hortonworks-spark / spark-atlas-connector

A Spark Atlas connector to track data lineage in Apache Atlas
Apache License 2.0
264 stars 149 forks source link

java.lang.IllegalStateException: SparkContext has been shutdown #320

Open cometta opened 2 years ago

cometta commented 2 years ago

May i know does this library still under development? I use spark-atlas-connector-assembly-3.2.1.7.2.15.0-147.jar with pyspark, when run, i get below error. can advice please? The error is not clear, how to enable more verbose error message?

22/06/21 19:33:07 WARN SparkExecutionPlanProcessor: Caught exception during parsing event
java.lang.IllegalStateException: SparkContext has been shutdown
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2206)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279)
    at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
    at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
    at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:394)
    at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3715)
    at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2971)
    at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3706)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
    at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3704)
    at org.apache.spark.sql.Dataset.collect(Dataset.scala:2971)
    at org.apache.spark.sql.delta.PartitionFiltering.$anonfun$filesForScan$2(PartitionFiltering.scala:38)
    at org.apache.spark.sql.delta.util.DeltaProgressReporter.withJobDescription(DeltaProgressReporter.scala:53)
    at org.apache.spark.sql.delta.util.DeltaProgressReporter.withStatusCode(DeltaProgressReporter.scala:32)
    at org.apache.spark.sql.delta.util.DeltaProgressReporter.withStatusCode$(DeltaProgressReporter.scala:27)
    at org.apache.spark.sql.delta.Snapshot.withStatusCode(Snapshot.scala:56)
    at org.apache.spark.sql.delta.PartitionFiltering.filesForScan(PartitionFiltering.scala:38)
    at org.apache.spark.sql.delta.PartitionFiltering.filesForScan$(PartitionFiltering.scala:27)
    at org.apache.spark.sql.delta.Snapshot.filesForScan(Snapshot.scala:56)
    at org.apache.spark.sql.delta.files.TahoeLogFileIndex.inputFiles(TahoeFileIndex.scala:190)
    at org.apache.spark.sql.execution.datasources.HadoopFsRelation.inputFiles(HadoopFsRelation.scala:76)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$.$anonfun$discoverInputsEntities$3(CommandsHarvester.scala:502)
    at scala.Option.getOrElse(Option.scala:189)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$.$anonfun$discoverInputsEntities$1(CommandsHarvester.scala:502)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:293)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:293)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:290)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$.com$hortonworks$spark$atlas$sql$CommandsHarvester$$discoverInputsEntities(CommandsHarvester.scala:497)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$SaveIntoDataSourceHarvester$.harvest(CommandsHarvester.scala:181)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.$anonfun$process$3(SparkExecutionPlanProcessor.scala:120)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:293)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:293)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:290)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.process(SparkExecutionPlanProcessor.scala:101)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.process(SparkExecutionPlanProcessor.scala:67)
    at com.hortonworks.spark.atlas.AbstractEventProcessor.eventProcess(AbstractEventProcessor.scala:97)
    at com.hortonworks.spark.atlas.AbstractEventProcessor$$anon$1.run(AbstractEventProcessor.scala:46)