Toolkit for Apache Spark ML for Feature clean-up, feature Importance calculation suite, Information Gain selection, Distributed SMOTE, Model selection and training, Hyper parameter optimization and selection, Model interprability.
Other
191
stars
44
forks
source link
Fix example notebook so it works out of the box #4
Cmd 3 High Level Process diagram minor mispelling: infernece -> inference
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:1102)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:1100)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:1100)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:2592)
at org.apache.spark.util.EventLoop.stop(EventLoop.scala:84)
at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:2503)
at org.apache.spark.SparkContext$$anonfun$stop$6.apply$mcV$sp(SparkContext.scala:2107)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1506)
at org.apache.spark.SparkContext.stop(SparkContext.scala:2106)
at org.apache.spark.TaskFailedListener$$anon$1$$anonfun$run$1.apply$mcV$sp(SparkParallelismTracker.scala:131)
at org.apache.spark.TaskFailedListener$$anon$1$$anonfun$run$1.apply(SparkParallelismTracker.scala:131)
at org.apache.spark.TaskFailedListener$$anon$1$$anonfun$run$1.apply(SparkParallelismTracker.scala:131)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.TaskFailedListener$$anon$1.run(SparkParallelismTracker.scala:130)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:893)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2243)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2265)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2284)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2309)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:961)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:379)
at org.apache.spark.rdd.RDD.collect(RDD.scala:960)
at org.apache.spark.RangePartitioner$.sketch(Partitioner.scala:309)
at org.apache.spark.RangePartitioner.<init>(Partitioner.scala:171)
at org.apache.spark.RangePartitioner.<init>(Partitioner.scala:151)
at org.apache.spark.rdd.OrderedRDDFunctions$$anonfun$sortByKey$1.apply(OrderedRDDFunctions.scala:62)
at org.apache.spark.rdd.OrderedRDDFunctions$$anonfun$sortByKey$1.apply(OrderedRDDFunctions.scala:61)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:379)
at org.apache.spark.rdd.OrderedRDDFunctions.sortByKey(OrderedRDDFunctions.scala:61)
at org.apache.spark.mllib.evaluation.BinaryClassificationMetrics.x$4$lzycompute(BinaryClassificationMetrics.scala:155)
at org.apache.spark.mllib.evaluation.BinaryClassificationMetrics.x$4(BinaryClassificationMetrics.scala:146)
at org.apache.spark.mllib.evaluation.BinaryClassificationMetrics.confusions$lzycompute(BinaryClassificationMetrics.scala:148)
Current example does not work. https://github.com/databrickslabs/automl-toolkit/blob/master/demos/AutoMLPresentationDemo.dbc
Issues: