val savepath =sys.env("HDFS_TMP")
val installation_ID= sys.env("TMP_II_INSTALLATION")
val hostname= sys.env("TMP_HOSTNAME")
val databasename= sys.env("SEPPARAMDB")
val user = sys.env.get("II_USER")
val password = sys.env.get("II_PASSWORD")
val usernameAndPasswordStatement = (user, password) match {
case (Some(u), Some(p)) => s""", user "$u", password "$p""""
case _ => ""
}
scala> sqlContext.sql("select distinct vwload_reg02_unload_tbl.col_float4,vwload_reg02_unload_tbl.col_int,vwload_reg02_unload_tbl.col_nvarchar from vwload_reg02_unload_tbl").write.format("com.databricks.spark.csv").save(s"$savepath/vwload_reg02_unload_tbl_13.csv")
[Stage 0:> (0 + 4) / 9]16/05/20 13:58:39 WARN TaskSetManager: Lost task 7.0 in stage 0.0 (TID 0, uksl-kelch01-cent6-clu2.actian.com): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableInt cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableFloat
at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getFloat(SpecificMutableRow.scala:261)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:374)
at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.start(TungstenAggregationIterator.scala:622)
at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1.org$apache$spark$sql$execution$aggregate$TungstenAggregate$$anonfun$$executePartition$1(TungstenAggregate.scala:110)
at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119)
at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119)
at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:64)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
16/05/20 13:58:39 WARN TaskSetManager: Lost task 7.1 in stage 0.0 (TID 5, uksl-kelch01-cent6-clu2.actian.com): java.lang.Exception: Connection to Vector end point has been closed or amount of data communicated does not match the message length
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:96)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:88)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBuffer(DataStreamReader.scala:88)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBufferWithLength(DataStreamReader.scala:106)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readWithByteBuffer(DataStreamReader.scala:113)
at com.actian.spark_vector.datastream.DataStreamConnector.readExternalInsertConnectionHeader(DataStreamConnector.scala:120)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:48)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:47)
at com.actian.spark_vector.datastream.DataStreamConnector$$anonfun$newConnection$1.apply(DataStreamConnector.scala:109)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.DataStreamConnector.newConnection(DataStreamConnector.scala:109)
at com.actian.spark_vector.datastream.reader.DataStreamReader.read(DataStreamReader.scala:47)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.datastream.reader.ScanRDD.compute(ScanRDD.scala:44)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
16/05/20 13:58:41 ERROR TaskSetManager: Task 7 in stage 0.0 failed 4 times; aborting job
16/05/20 13:58:41 WARN ExecutorAllocationManager: No stages are running, but numRunningTasks != 0
org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 0.0 failed 4 times, most recent failure: Lost task 7.3 in stage 0.0 (TID 10, uksl-kelch01-cent6-clu2.actian.com): java.lang.Exception: Connection to Vector end point has been closed or amount of data communicated does not match the message length
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:96)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:88)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBuffer(DataStreamReader.scala:88)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBufferWithLength(DataStreamReader.scala:106)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readWithByteBuffer(DataStreamReader.scala:113)
at com.actian.spark_vector.datastream.DataStreamConnector.readExternalInsertConnectionHeader(DataStreamConnector.scala:120)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:48)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:47)
at com.actian.spark_vector.datastream.DataStreamConnector$$anonfun$newConnection$1.apply(DataStreamConnector.scala:109)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.DataStreamConnector.newConnection(DataStreamConnector.scala:109)
at com.actian.spark_vector.datastream.reader.DataStreamReader.read(DataStreamReader.scala:47)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.datastream.reader.ScanRDD.compute(ScanRDD.scala:44)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1294)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1282)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1281)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1281)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1507)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1469)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1914)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1124)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1065)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:989)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:897)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:896)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1426)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1405)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1405)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1405)
at com.databricks.spark.csv.package$CsvSchemaRDD.saveAsCsvFile(package.scala:169)
at com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:165)
at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:170)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:146)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:137)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:22)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:27)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:29)
at $iwC$$iwC$$iwC$$iwC$$iwC.(:31)
at $iwC$$iwC$$iwC$$iwC.(:33)
at $iwC$$iwC$$iwC.(:35)
at $iwC$$iwC.(:37)
at $iwC.(:39)
at (:41)
at .(:45)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.Exception: Connection to Vector end point has been closed or amount of data communicated does not match the message length
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:96)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:88)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBuffer(DataStreamReader.scala:88)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBufferWithLength(DataStreamReader.scala:106)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readWithByteBuffer(DataStreamReader.scala:113)
at com.actian.spark_vector.datastream.DataStreamConnector.readExternalInsertConnectionHeader(DataStreamConnector.scala:120)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:48)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:47)
at com.actian.spark_vector.datastream.DataStreamConnector$$anonfun$newConnection$1.apply(DataStreamConnector.scala:109)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.DataStreamConnector.newConnection(DataStreamConnector.scala:109)
at com.actian.spark_vector.datastream.reader.DataStreamReader.read(DataStreamReader.scala:47)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.datastream.reader.ScanRDD.compute(ScanRDD.scala:44)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
4.2.3H with latest patch and daily spark 0.2 jar.
The unload query failing:
select distinct vwload_reg02_unload_tbl.col_float4,vwload_reg02_unload_tbl.col_int,vwload_reg02_unload_tbl.col_nvarchar from vwload_reg02_unload_tbl
ENV:
export SPARK_MASTER=yarn export HDFS_TMP=$(iigetenv II_HDFSDATA) export SPARK_LOADER_JAR=/home/actian/kelch01/Spark/unloader/spark_vector_loader-assembly-0.2.jar export SEPPARAMDB=vwtestdb0
export TMP_II_HDFSDATA=$(ingprenv II_HDFSDATA) export TMP_II_INSTALLATION=$(ingprenv II_INSTALLATION) export TMP_HOSTNAME=$HOSTNAME
export ING_TST=/home/actian/kelch01/svn_main_checkout/
export II_USER=actian export II_PASSWORD=actian
LOAD:
create table vwload_reg02_unload_tbl (col_int int, col_float4 float4, col_money money,col_decimal382 decimal(38,2),col_decimal102 decimal(10,2), col_char20 char(20), col_varchar20 varchar(20), col_nchar20 nchar(20),col_nvarchar nvarchar(20), col_ansidate ansidate, col_timestamp timestamp);\g
use attached data file vwload --nullvalue NULL --fdelim \t --table vwload_reg02_unload_tbl $SEPPARAMDB vwload_reg02_data.txt
create table vwload_reg02_unload_tbl2 as select * vwload_reg02_unload_tbl\g
UNLOAD:
hadoop fs -rm -r -skipTrash $(ingprenv HDFS_TMP)/*.csv spark-shell --master $SPARK_MASTER --driver-java-options '-Duser.timezone=GMT' --conf "spark.executor.memory=10G" --conf "spark.driver.memory=10G" --jars $SPARK_LOADER_JAR
val savepath =sys.env("HDFS_TMP") val installation_ID= sys.env("TMP_II_INSTALLATION") val hostname= sys.env("TMP_HOSTNAME") val databasename= sys.env("SEPPARAMDB") val user = sys.env.get("II_USER") val password = sys.env.get("II_PASSWORD")
val usernameAndPasswordStatement = (user, password) match { case (Some(u), Some(p)) => s""", user "$u", password "$p"""" case _ => "" }
sqlContext.sql(s"""CREATE TEMPORARY TABLE vwload_reg02_unload_tbl USING com.actian.spark_vector.sql.DefaultSource OPTIONS ( host "$hostname", instance "$installation_ID", database "$databasename", table "vwload_reg02_unload_tbl" $usernameAndPasswordStatement)""")
sqlContext.sql(s"""CREATE TEMPORARY TABLE vwload_reg02_unload_tbl2 USING com.actian.spark_vector.sql.DefaultSource OPTIONS ( host "$hostname", instance "$installation_ID", database "$databasename", table "vwload_reg02_unload_tbl2" $usernameAndPasswordStatement)""")
scala> sqlContext.sql("select distinct vwload_reg02_unload_tbl.col_float4,vwload_reg02_unload_tbl.col_int,vwload_reg02_unload_tbl.col_nvarchar from vwload_reg02_unload_tbl").write.format("com.databricks.spark.csv").save(s"$savepath/vwload_reg02_unload_tbl_13.csv") [Stage 0:> (0 + 4) / 9]16/05/20 13:58:39 WARN TaskSetManager: Lost task 7.0 in stage 0.0 (TID 0, uksl-kelch01-cent6-clu2.actian.com): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableInt cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableFloat at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getFloat(SpecificMutableRow.scala:261) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:374) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.start(TungstenAggregationIterator.scala:622) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1.org$apache$spark$sql$execution$aggregate$TungstenAggregate$$anonfun$$executePartition$1(TungstenAggregate.scala:110) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)
16/05/20 13:58:39 WARN TaskSetManager: Lost task 7.1 in stage 0.0 (TID 5, uksl-kelch01-cent6-clu2.actian.com): java.lang.Exception: Connection to Vector end point has been closed or amount of data communicated does not match the message length at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:96) at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:88) at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39) at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBuffer(DataStreamReader.scala:88) at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBufferWithLength(DataStreamReader.scala:106) at com.actian.spark_vector.datastream.reader.DataStreamReader$.readWithByteBuffer(DataStreamReader.scala:113) at com.actian.spark_vector.datastream.DataStreamConnector.readExternalInsertConnectionHeader(DataStreamConnector.scala:120) at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:48) at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:47) at com.actian.spark_vector.datastream.DataStreamConnector$$anonfun$newConnection$1.apply(DataStreamConnector.scala:109) at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39) at com.actian.spark_vector.datastream.DataStreamConnector.newConnection(DataStreamConnector.scala:109) at com.actian.spark_vector.datastream.reader.DataStreamReader.read(DataStreamReader.scala:47) at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124) at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124) at com.actian.spark_vector.datastream.reader.ScanRDD.compute(ScanRDD.scala:44) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)
16/05/20 13:58:41 ERROR TaskSetManager: Task 7 in stage 0.0 failed 4 times; aborting job 16/05/20 13:58:41 WARN ExecutorAllocationManager: No stages are running, but numRunningTasks != 0 org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 0.0 failed 4 times, most recent failure: Lost task 7.3 in stage 0.0 (TID 10, uksl-kelch01-cent6-clu2.actian.com): java.lang.Exception: Connection to Vector end point has been closed or amount of data communicated does not match the message length at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:96) at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:88) at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39) at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBuffer(DataStreamReader.scala:88) at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBufferWithLength(DataStreamReader.scala:106) at com.actian.spark_vector.datastream.reader.DataStreamReader$.readWithByteBuffer(DataStreamReader.scala:113) at com.actian.spark_vector.datastream.DataStreamConnector.readExternalInsertConnectionHeader(DataStreamConnector.scala:120) at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:48) at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:47) at com.actian.spark_vector.datastream.DataStreamConnector$$anonfun$newConnection$1.apply(DataStreamConnector.scala:109) at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39) at com.actian.spark_vector.datastream.DataStreamConnector.newConnection(DataStreamConnector.scala:109) at com.actian.spark_vector.datastream.reader.DataStreamReader.read(DataStreamReader.scala:47) at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124) at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124) at com.actian.spark_vector.datastream.reader.ScanRDD.compute(ScanRDD.scala:44) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)
Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1294) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1282) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1281) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1281) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1507) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1469) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1914) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1124) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1065) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1065) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:989) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:897) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:896) at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1426) at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1405) at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1405) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1405) at com.databricks.spark.csv.package$CsvSchemaRDD.saveAsCsvFile(package.scala:169) at com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:165) at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:170) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:146) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:137) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:22)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:27)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:29)
at $iwC$$iwC$$iwC$$iwC$$iwC.(:31)
at $iwC$$iwC$$iwC$$iwC.(:33)
at $iwC$$iwC$$iwC.(:35)
at $iwC$$iwC.(:37)
at $iwC.(:39)
at (:41)
at .(:45)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.Exception: Connection to Vector end point has been closed or amount of data communicated does not match the message length
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:96)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$readByteBuffer$1.apply(DataStreamReader.scala:88)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBuffer(DataStreamReader.scala:88)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readByteBufferWithLength(DataStreamReader.scala:106)
at com.actian.spark_vector.datastream.reader.DataStreamReader$.readWithByteBuffer(DataStreamReader.scala:113)
at com.actian.spark_vector.datastream.DataStreamConnector.readExternalInsertConnectionHeader(DataStreamConnector.scala:120)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:48)
at com.actian.spark_vector.datastream.reader.DataStreamReader$$anonfun$read$1.apply(DataStreamReader.scala:47)
at com.actian.spark_vector.datastream.DataStreamConnector$$anonfun$newConnection$1.apply(DataStreamConnector.scala:109)
at com.actian.spark_vector.util.ResourceUtil$.closeResourceOnFailure(ResourceUtil.scala:39)
at com.actian.spark_vector.datastream.DataStreamConnector.newConnection(DataStreamConnector.scala:109)
at com.actian.spark_vector.datastream.reader.DataStreamReader.read(DataStreamReader.scala:47)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.vector.Vector$$anonfun$unloadVector$1$$anonfun$7.apply(Vector.scala:124)
at com.actian.spark_vector.datastream.reader.ScanRDD.compute(ScanRDD.scala:44)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)