I'm working on a Spark ML sentiment analysis platform using a mobile feedback APP as a data source. The Data (Feedback) re stored on MongoDB and i wanna read them with spark. I tried to connect spark to MongoDB but not lucky i keep getting this error :
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2124)
at org.apache.spark.rdd.RDD$$anonfun$aggregate$1.apply(RDD.scala:1118)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.aggregate(RDD.scala:1111)
at com.stratio.datasource.mongodb.schema.MongodbSchema.schema(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema$lzycompute(MongodbRelation.scala:63)
at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema(MongodbRelation.scala:60)
at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65)
at scala.Option.getOrElse(Option.scala:121)
at com.stratio.datasource.mongodb.MongodbRelation.(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.DefaultSource.createRelation(DefaultSource.scala:36)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
at com.spml.MongoDbSparkCon.main(MongoDbSparkCon.java:36)
Caused by: java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
2018-05-28 18:54:24 INFO DAGScheduler:54 - Job 0 failed: aggregate at MongodbSchema.scala:46, took 0,855790 s
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
My spark code : Im using Java
public class MongoDbSparkCon {
public static void main(String[] args) {
JavaSparkContext sc = new JavaSparkContext("local[*]", "test spark-mongodb java");
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
Map options = new HashMap();
options.put("host", "localhost:27017");
options.put("database", "feedbackuib");
options.put("collection", "Feedback");
Dataset<Row> df = sqlContext.read().format("com.stratio.datasource.mongodb").options(options).load();
df.registerTempTable("Feedback");
sqlContext.sql("SELECT * FROM Feedback");
df.show();
I'm working on a Spark ML sentiment analysis platform using a mobile feedback APP as a data source. The Data (Feedback) re stored on MongoDB and i wanna read them with spark. I tried to connect spark to MongoDB but not lucky i keep getting this error : Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2; at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85) at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46) at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46) at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189) at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188) at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150) at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)
Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2124) at org.apache.spark.rdd.RDD$$anonfun$aggregate$1.apply(RDD.scala:1118) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) at org.apache.spark.rdd.RDD.aggregate(RDD.scala:1111) at com.stratio.datasource.mongodb.schema.MongodbSchema.schema(MongodbSchema.scala:46) at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema$lzycompute(MongodbRelation.scala:63) at com.stratio.datasource.mongodb.MongodbRelation.com$stratio$datasource$mongodb$MongodbRelation$$lazySchema(MongodbRelation.scala:60) at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65) at com.stratio.datasource.mongodb.MongodbRelation$$anonfun$1.apply(MongodbRelation.scala:65) at scala.Option.getOrElse(Option.scala:121) at com.stratio.datasource.mongodb.MongodbRelation.(MongodbRelation.scala:65)
at com.stratio.datasource.mongodb.DefaultSource.createRelation(DefaultSource.scala:36)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
at com.spml.MongoDbSparkCon.main(MongoDbSparkCon.java:36)
Caused by: java.lang.NoSuchMethodError: org.apache.spark.sql.catalyst.analysis.TypeCoercion$.findTightestCommonTypeOfTwo()Lscala/Function2;
at com.stratio.datasource.mongodb.schema.MongodbSchema.com$stratio$datasource$mongodb$schema$MongodbSchema$$compatibleType(MongodbSchema.scala:85)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at com.stratio.datasource.mongodb.schema.MongodbSchema$$anonfun$3.apply(MongodbSchema.scala:46)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:189)
at org.apache.spark.util.collection.ExternalSorter$$anonfun$5.apply(ExternalSorter.scala:188)
at org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:150)
at org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue(SizeTrackingAppendOnlyMap.scala:32)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:194)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
2018-05-28 18:54:24 INFO DAGScheduler:54 - Job 0 failed: aggregate at MongodbSchema.scala:46, took 0,855790 s
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
My spark code : Im using Java
public class MongoDbSparkCon {
}}
Pom.xml :
Please anny help !!