I use FPGrowth to generate frequent pattern. The program works well when I set MinSupport to 0.001.
StackOverflowError always occurs while I iterate items over model.freqItemsets with MinSupport 0.0001. How should I iterate freqItemsets without this problem?
Spark Version: 1.5.2 with standalone deploy mode
Dateset: 1,735,402 tractions and ~600k distinct items.
Source code:
object AppAR {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("AR Model Training")
val sc = new SparkContext(conf)
val data = sc.textFile("hdfs://itrihd34:8020/tmp/tmp_gohappy_ar/*")
val trans:RDD[Array[String]] = data.map(l => l.trim.split('\t'))
val fpg = new FPGrowth().setMinSupport(0.0001).setNumPartitions(10)
val model = fpg.run(trans)
//-- !!! always StackOverflowError
// model.freqItemsets.foreach { itemset => println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq) }
//-- !!! always StackOverflowError
// println("#freqItemsets: " + model.freqItemsets.count())
println("#trans: " + trans.count())
println("done")
}
}
Error message:
15/11/19 12:40:33 INFO DAGScheduler: Job 2 failed: foreach at AppAR.scala:22, took 27.730936 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 3.0 failed 4 times, most recent failure: Lost task 3.3 in stage 3.0 (TID 30, 140.96.83.41): java.lang.StackOverflowError at java.lang.Exception.(Exception.java:102) at java.lang.ReflectiveOperationException.(ReflectiveOperationException.java:89) at java.lang.reflect.InvocationTargetException.(InvocationTargetException.java:72) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:137) at scala.collection.mutable.HashMap$$anonfun$writeObject$1.apply(HashMap.scala:135) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39) at scala.collection.mutable.HashTable$class.serializeTo(HashTable.scala:124) at scala.collection.mutable.HashMap.serializeTo(HashMap.scala:39) at scala.collection.mutable.HashMap.writeObject(HashMap.scala:135) at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ...
Aucun commentaire:
Enregistrer un commentaire