Open abhisagarwal opened 6 months ago
Hi @abhisagarwal, This is actually an issue with the JDBC driver rather than the Spark Connector. The problem was addressed with this PR https://github.com/snowflakedb/snowflake-jdbc/pull/1608. That was fixed in the JDBC driver starting with version 3.14.4.
facing the below error while trying to access Snowflake from Spark
DF-SYS-01 org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 0.0 failed 1 times, most recent failure: Lost task 2.0 in stage 0.0 (TID 2) (vm-b7c76241 executor 1): java.lang.NullPointerException at net.snowflake.client.jdbc.telemetryOOB.TelemetryService$TELEMETRY_SERVER_DEPLOYMENT.access$000(TelemetryService.java:217) at net.snowflake.client.jdbc.telemetryOOB.TelemetryService.getServerDeploymentName(TelemetryService.java:256) at net.snowflake.client.jdbc.telemetryOOB.TelemetryEvent$Builder.(TelemetryEvent.java:126)
at net.snowflake.client.jdbc.telemetryOOB.TelemetryEvent$LogBuilder.(TelemetryEvent.java:61)
at net.snowflake.client.jdbc.SnowflakeSQLLoggedException.sendOutOfBandTelemetryMessage(SnowflakeSQLLoggedException.java:53)
at net.snowflake.client.jdbc.SnowflakeSQLLoggedException.sendTelemetryData(SnowflakeSQLLoggedException.java:220)
at net.snowflake.client.jdbc.SnowflakeSQLLoggedException.(SnowflakeSQLLoggedException.java:245)
at net.snowflake.client.jdbc.SnowflakeChunkDownloader.getNextChunkToConsume(SnowflakeChunkDownloader.java:602)
at net.snowflake.client.core.SFArrowResultSet.fetchNextRowUnsorted(SFArrowResultSet.java:232)
at net.snowflake.client.core.SFArrowResultSet.fetchNextRow(SFArrowResultSet.java:209)
at net.snowflake.client.core.SFArrowResultSet.next(SFArrowResultSet.java:344)
at net.snowflake.client.jdbc.SnowflakeResultSetV1.next(SnowflakeResultSetV1.java:92)
at net.snowflake.spark.snowflake.io.ResultIterator.hasNext(SnowflakeResultSetRDD.scala:152)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:764)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
at org.apache.spark.scheduler.Task.run(Task.scala:136)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2682) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2618) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2617) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2617) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1190) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1190) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1190) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2870) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2812) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2801) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:958) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2398) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2419) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2438) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2463) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1028) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:407) at org.apache.spark.rdd.RDD.collect(RDD.scala:1027) at org.apache.spark.RangePartitioner$.sketch(Partitioner.scala:304) at org.apache.spark.RangePartitioner.(Partitioner.scala:171)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:293)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:173)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:167)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:189)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:265)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:226)
at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:531)
at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:459)
at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:458)
at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:502)
at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:157)
at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:52)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:755)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:265)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:226)
at com.microsoft.dataflow.spark.SequenceExec.doExecute(SequenceExec.scala:39)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:265)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:226)
at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:531)
at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:459)
at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:458)
at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:502)
at org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:239)
at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:52)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:755)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:265)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:226)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:91)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:265)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:226)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:135)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:135)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:169)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:167)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:189)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:265)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:226)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:91)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:230)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:268)