jeudi 19 novembre 2015

StackOverflowError always occurs while I iterate items over model.freqItemsets with smaller MinSupport (0.0001)

I use FPGrowth to generate frequent pattern. The program works well when I set MinSupport to 0.001.

StackOverflowError always occurs while I iterate items over model.freqItemsets with MinSupport 0.0001. How should I iterate freqItemsets without this problem?

Spark Version: 1.5.2 with standalone deploy mode

Dateset: 1,735,402 tractions and ~600k distinct items.

Source code:

object AppAR {
  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("AR Model Training")

    val sc = new SparkContext(conf)

    val data = sc.textFile("hdfs://itrihd34:8020/tmp/tmp_gohappy_ar/*")

    val trans:RDD[Array[String]] = data.map(l => l.trim.split('\t'))

    val fpg = new FPGrowth().setMinSupport(0.0001).setNumPartitions(10)

    val model = fpg.run(trans)

    //-- !!! always StackOverflowError
//    model.freqItemsets.foreach { itemset => println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq) }

    //-- !!! always StackOverflowError
//    println("#freqItemsets:  " +  model.freqItemsets.count())

    println("#trans: " + trans.count())

    println("done")
  }
}

Error message:

15/11/19 12:40:33 INFO DAGScheduler: Job 2 failed: foreach at AppAR.scala:22, took 27.730936 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 3.0 failed 4 times, most recent failure: Lost task 3.3 in stage 3.0 (TID 30, 140.96.83.41): java.lang.StackOverflowError at java.lang.Exception.(Exception.java:102) at java.lang.ReflectiveOperationException.(ReflectiveOperationException.java:89) at java.lang.reflect.InvocationTargetException.(InvocationTargetException.java:72) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ...

Aucun commentaire:

Enregistrer un commentaire