databricks / spark-sql-perf

Apache License 2.0
586 stars 406 forks source link

dataGen error #126

Closed hahasdnu1029 closed 6 years ago

hahasdnu1029 commented 6 years ago

spark2.2.0

Caused by: java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 0 if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, cs_sold_date_sk), StringType), true) AS cs_sold_date_sk#939if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 1, cs_sold_time_sk), StringType), true) AS cs_sold_time_sk#940if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 2, cs_ship_date_sk), StringType), true) AS cs_ship_date_sk#941if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 3, cs_bill_customer_sk), StringType), true) AS cs_bill_customer_sk#942if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 4, cs_bill_cdemo_sk), StringType), true) AS cs_bill_cdemo_sk#943if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 5, cs_bill_hdemo_sk), StringType), true) AS cs_bill_hdemo_sk#944if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 6, cs_bill_addr_sk), StringType), true) AS cs_bill_addr_sk#945if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 7, cs_ship_customer_sk), StringType), true) AS cs_ship_customer_sk#946if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 8, cs_ship_cdemo_sk), StringType), true) AS cs_ship_cdemo_sk#947if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 9, cs_ship_hdemo_sk), StringType), true) AS cs_ship_hdemo_sk#948if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 10, cs_ship_addr_sk), StringType), true) AS cs_ship_addr_sk#949if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 11, cs_call_center_sk), StringType), true) AS cs_call_center_sk#950if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, cs_catalog_page_sk), StringType), true) AS cs_catalog_page_sk#951if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 13, cs_ship_mode_sk), StringType), true) AS cs_ship_mode_sk#952if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 14, cs_warehouse_sk), StringType), true) AS cs_warehouse_sk#953if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 15, cs_item_sk), StringType), true) AS cs_item_sk#954if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 16, cs_promo_sk), StringType), true) AS cs_promo_sk#955if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 17, cs_order_number), StringType), true) AS cs_order_number#956if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 18, cs_quantity), StringType), true) AS cs_quantity#957if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 19, cs_wholesale_cost), StringType), true) AS cs_wholesale_cost#958if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 20, cs_list_price), StringType), true) AS cs_list_price#959if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 21, cs_sales_price), StringType), true) AS cs_sales_price#960if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 22, cs_ext_discount_amt), StringType), true) AS cs_ext_discount_amt#961if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 23, cs_ext_sales_price), StringType), true) AS cs_ext_sales_price#962if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 24, cs_ext_wholesale_cost), StringType), true) AS cs_ext_wholesale_cost#963if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 25, cs_ext_list_price), StringType), true) AS cs_ext_list_price#964if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 26, cs_ext_tax), StringType), true) AS cs_ext_tax#965if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 27, cs_coupon_amt), StringType), true) AS cs_coupon_amt#966if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 28, cs_ext_ship_cost), StringType), true) AS cs_ext_ship_cost#967if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 29, cs_net_paid), StringType), true) AS cs_net_paid#968if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 30, cs_net_paid_inc_tax), StringType), true) AS cs_net_paid_inc_tax#969if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 31, cs_net_paid_inc_ship), StringType), true) AS cs_net_paid_inc_ship#970if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#971if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#972 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more

juliuszsompolski commented 6 years ago

Hi @hahasdnu1029 Could you provide more reproduction information as to what are you doing precisely to get this error?

hahasdnu1029 commented 6 years ago

Hi@juliuszsompolski I use spark - SQL - perf to test spark SQL as follows: Running in spark-shell(spark version2.2.0,scala version 2.11.8,java version 1.8) 1、./spark-shell --jars /home/sparktest/spark-sql-perf.jar --num-executors 20 --executor-cores 2 --executor-memory 8G --master spark://hw080:7077 park context available as 'sc' (master = spark://hw080:7077, app id = app-20180224104430-0002). Spark session available as 'spark'. Welcome to


 / __/__  ___ _____/ /__
_\ \/ _ \/ _ `/ __/  '_/

// ./_,// //_\ version 2.2.0 /_/

Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_161) Type in expressions to have them evaluated. Type :help for more information. scala> val sqlContext=new org.apache.spark.sql.SQLContext(sc) warning: there was one deprecation warning; re-run with -deprecation for details sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@1caff0db

scala> import sqlContext.implicits. import sqlContext.implicits.

scala> import com.databricks.spark.sql.perf.tpcds.TPCDSTables import com.databricks.spark.sql.perf.tpcds.TPCDSTables

scala> val tables=new TPCDSTables(sqlContext,"/home/sparktest/tpcds-kit/tools","1",false,false) tables: com.databricks.spark.sql.perf.tpcds.TPCDSTables = com.databricks.spark.sql.perf.tpcds.TPCDSTables@5901757

scala> tables.genData("hdfs://hw080:9000/tpctest","parquet",true,true,true,false,"",100) Pre-clustering with partitioning columns with query SELECT cs_sold_date_sk,cs_sold_time_sk,cs_ship_date_sk,cs_bill_customer_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_bill_addr_sk,cs_ship_customer_sk,cs_ship_cdemo_sk,cs_ship_hdemo_sk,cs_ship_addr_sk,c s_call_center_sk,cs_catalog_page_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_list_price,cs_sales_price,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_tax,cs_coupon_amt,cs_ext_ship_cost,cs_net_paid,cs_net_paid_inc_tax,cs_net_paid_inc_ship,cs_net_paid_inc_ship_tax,cs_net_profitFROM catalog_sales_text

DISTRIBUTE BY cs_sold_date_sk . Generating table catalog_sales in database to hdfs://hw080:9000/tpctest/catalog_sales with save mode Overwrite. 18/02/24 10:50:49 WARN TaskSetManager: Lost task 9.0 in stage 2.0 (TID 61, 172.18.11.80, executor 5): java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsExceptio n: 0if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, cs_sold_date_sk), StringType), true) AS cs_sold_date_sk#313if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 1, cs_sold_time_sk), StringType), true) AS cs_sold_time_sk#314if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 2, cs_ship_date_sk), StringType), true) AS cs_ship_date_sk#315if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 3, cs_bill_customer_sk), StringType), true) AS cs_bill_customer_sk#316if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 4, cs_bill_cdemo_sk), StringType), true) AS cs_bill_cdemo_sk#317if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 5, cs_bill_hdemo_sk), StringType), true) AS cs_bill_hdemo_sk#318if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 6, cs_bill_addr_sk), StringType), true) AS cs_bill_addr_sk#319if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 7, cs_ship_customer_sk), StringType), true) AS cs_ship_customer_sk#320if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 8, cs_ship_cdemo_sk), StringType), true) AS cs_ship_cdemo_sk#321if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 9, cs_ship_hdemo_sk), StringType), true) AS cs_ship_hdemo_sk#322if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 10, cs_ship_addr_sk), StringType), true) AS cs_ship_addr_sk#323if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 11, cs_call_center_sk), StringType), true) AS cs_call_center_sk#324if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, cs_catalog_page_sk), StringType), true) AS cs_catalog_page_sk#325if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 13, cs_ship_mode_sk), StringType), true) AS cs_ship_mode_sk#326if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 14, cs_warehouse_sk), StringType), true) AS cs_warehouse_sk#327if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 15, cs_item_sk), StringType), true) AS cs_item_sk#328if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 16, cs_promo_sk), StringType), true) AS cs_promo_sk#329if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 17, cs_order_number), StringType), true) AS cs_order_number#330if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 18, cs_quantity), StringType), true) AS cs_quantity#331if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 19, cs_wholesale_cost), StringType), true) AS cs_wholesale_cost#332if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 20, cs_list_price), StringType), true) AS cs_list_price#333if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 21, cs_sales_price), StringType), true) AS cs_sales_price#334if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 22, cs_ext_discount_amt), StringType), true) AS cs_ext_discount_amt#335if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 23, cs_ext_sales_price), StringType), true) AS cs_ext_sales_price#336if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 24, cs_ext_wholesale_cost), StringType), true) AS cs_ext_wholesale_cost#337if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 25, cs_ext_list_price), StringType), true) AS cs_ext_list_price#338if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 26, cs_ext_tax), StringType), true) AS cs_ext_tax#339if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 27, cs_coupon_amt), StringType), true) AS cs_coupon_amt#340if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 28, cs_ext_ship_cost), StringType), true) AS cs_ext_ship_cost#341if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 29, cs_net_paid), StringType), true) AS cs_net_paid#342if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 30, cs_net_paid_inc_tax), StringType), true) AS cs_net_paid_inc_tax#343if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 31, cs_net_paid_inc_ship), StringType), true) AS cs_net_paid_inc_ship#344if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#345if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#346 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more

18/02/24 10:50:49 ERROR TaskSetManager: Task 0 in stage 2.0 failed 4 times; aborting job 18/02/24 10:50:49 ERROR FileFormatWriter: Aborting job null. org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 100, 172.18.11.80, executor 4): jav a.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 0if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, cs_sold_date_sk), StringType), true) AS cs_sold_date_sk#313if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 1, cs_sold_time_sk), StringType), true) AS cs_sold_time_sk#314if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 2, cs_ship_date_sk), StringType), true) AS cs_ship_date_sk#315if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 3, cs_bill_customer_sk), StringType), true) AS cs_bill_customer_sk#316if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 4, cs_bill_cdemo_sk), StringType), true) AS cs_bill_cdemo_sk#317if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 5, cs_bill_hdemo_sk), StringType), true) AS cs_bill_hdemo_sk#318if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 6, cs_bill_addr_sk), StringType), true) AS cs_bill_addr_sk#319if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 7, cs_ship_customer_sk), StringType), true) AS cs_ship_customer_sk#320if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 8, cs_ship_cdemo_sk), StringType), true) AS cs_ship_cdemo_sk#321if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 9, cs_ship_hdemo_sk), StringType), true) AS cs_ship_hdemo_sk#322if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 10, cs_ship_addr_sk), StringType), true) AS cs_ship_addr_sk#323if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 11, cs_call_center_sk), StringType), true) AS cs_call_center_sk#324if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, cs_catalog_page_sk), StringType), true) AS cs_catalog_page_sk#325if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 13, cs_ship_mode_sk), StringType), true) AS cs_ship_mode_sk#326if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 14, cs_warehouse_sk), StringType), true) AS cs_warehouse_sk#327if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 15, cs_item_sk), StringType), true) AS cs_item_sk#328if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 16, cs_promo_sk), StringType), true) AS cs_promo_sk#329if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 17, cs_order_number), StringType), true) AS cs_order_number#330if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 18, cs_quantity), StringType), true) AS cs_quantity#331if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 19, cs_wholesale_cost), StringType), true) AS cs_wholesale_cost#332if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 20, cs_list_price), StringType), true) AS cs_list_price#333if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 21, cs_sales_price), StringType), true) AS cs_sales_price#334if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 22, cs_ext_discount_amt), StringType), true) AS cs_ext_discount_amt#335if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 23, cs_ext_sales_price), StringType), true) AS cs_ext_sales_price#336if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 24, cs_ext_wholesale_cost), StringType), true) AS cs_ext_wholesale_cost#337if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 25, cs_ext_list_price), StringType), true) AS cs_ext_list_price#338if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 26, cs_ext_tax), StringType), true) AS cs_ext_tax#339if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 27, cs_coupon_amt), StringType), true) AS cs_coupon_amt#340if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 28, cs_ext_ship_cost), StringType), true) AS cs_ext_ship_cost#341if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 29, cs_net_paid), StringType), true) AS cs_net_paid#342if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 30, cs_net_paid_inc_tax), StringType), true) AS cs_net_paid_inc_tax#343if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 31, cs_net_paid_inc_ship), StringType), true) AS cs_net_paid_inc_ship#344if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#345if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#346 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more

Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438) at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) at com.databricks.spark.sql.perf.Tables$Table.genData(Tables.scala:227) at com.databricks.spark.sql.perf.Tables$$anonfun$genData$2.apply(Tables.scala:294) at com.databricks.spark.sql.perf.Tables$$anonfun$genData$2.apply(Tables.scala:292) at scala.collection.immutable.List.foreach(List.scala:381) at com.databricks.spark.sql.perf.Tables.genData(Tables.scala:292) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:39) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:44) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:46) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:48) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:50) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:52) at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:54) at $line34.$read$$iw$$iw$$iw$$iw$$iw.(:56) at $line34.$read$$iw$$iw$$iw$$iw.(:58) at $line34.$read$$iw$$iw$$iw.(:60) at $line34.$read$$iw$$iw.(:62) at $line34.$read$$iw.(:64) at $line34.$read.(:66) at $line34.$read$.(:70) at $line34.$read$.() at $line34.$eval$.$print$lzycompute(:7) at $line34.$eval$.$print(:6) at $line34.$eval.$print() at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807) at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97) at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) at org.apache.spark.repl.Main$.doMain(Main.scala:70) at org.apache.spark.repl.Main$.main(Main.scala:53) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:755) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 0 if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, cs_sold_date_sk), StringType), true) AS cs_sold_date_sk#313if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 1, cs_sold_time_sk), StringType), true) AS cs_sold_time_sk#314if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 2, cs_ship_date_sk), StringType), true) AS cs_ship_date_sk#315if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 3, cs_bill_customer_sk), StringType), true) AS cs_bill_customer_sk#316if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 4, cs_bill_cdemo_sk), StringType), true) AS cs_bill_cdemo_sk#317if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 5, cs_bill_hdemo_sk), StringType), true) AS cs_bill_hdemo_sk#318if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 6, cs_bill_addr_sk), StringType), true) AS cs_bill_addr_sk#319if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 7, cs_ship_customer_sk), StringType), true) AS cs_ship_customer_sk#320if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 8, cs_ship_cdemo_sk), StringType), true) AS cs_ship_cdemo_sk#321if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 9, cs_ship_hdemo_sk), StringType), true) AS cs_ship_hdemo_sk#322if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 10, cs_ship_addr_sk), StringType), true) AS cs_ship_addr_sk#323if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 11, cs_call_center_sk), StringType), true) AS cs_call_center_sk#324if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, cs_catalog_page_sk), StringType), true) AS cs_catalog_page_sk#325if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 13, cs_ship_mode_sk), StringType), true) AS cs_ship_mode_sk#326if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 14, cs_warehouse_sk), StringType), true) AS cs_warehouse_sk#327if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 15, cs_item_sk), StringType), true) AS cs_item_sk#328if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 16, cs_promo_sk), StringType), true) AS cs_promo_sk#329if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 17, cs_order_number), StringType), true) AS cs_order_number#330if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 18, cs_quantity), StringType), true) AS cs_quantity#331if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 19, cs_wholesale_cost), StringType), true) AS cs_wholesale_cost#332if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 20, cs_list_price), StringType), true) AS cs_list_price#333if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 21, cs_sales_price), StringType), true) AS cs_sales_price#334if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 22, cs_ext_discount_amt), StringType), true) AS cs_ext_discount_amt#335if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 23, cs_ext_sales_price), StringType), true) AS cs_ext_sales_price#336if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 24, cs_ext_wholesale_cost), StringType), true) AS cs_ext_wholesale_cost#337if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 25, cs_ext_list_price), StringType), true) AS cs_ext_list_price#338if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 26, cs_ext_tax), StringType), true) AS cs_ext_tax#339if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 27, cs_coupon_amt), StringType), true) AS cs_coupon_amt#340if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 28, cs_ext_ship_cost), StringType), true) AS cs_ext_ship_cost#341if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 29, cs_net_paid), StringType), true) AS cs_net_paid#342if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 30, cs_net_paid_inc_tax), StringType), true) AS cs_net_paid_inc_tax#343if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 31, cs_net_paid_inc_ship), StringType), true) AS cs_net_paid_inc_ship#344if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#345if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#346 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more 18/02/24 10:50:49 WARN TaskSetManager: Lost task 18.2 in stage 2.0 (TID 116, 172.18.11.80, executor 4): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 11.3 in stage 2.0 (TID 117, 172.18.11.83, executor 0): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 1.2 in stage 2.0 (TID 101, 172.18.11.80, executor 4): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 9.3 in stage 2.0 (TID 104, 172.18.11.83, executor 2): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 13.2 in stage 2.0 (TID 105, 172.18.11.83, executor 2): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 15.1 in stage 2.0 (TID 108, 172.18.11.80, executor 3): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 17.2 in stage 2.0 (TID 111, 172.18.11.83, executor 1): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 12.2 in stage 2.0 (TID 106, 172.18.11.80, executor 5): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 19.2 in stage 2.0 (TID 109, 172.18.11.80, executor 5): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 8.2 in stage 2.0 (TID 107, 172.18.11.166, executor 7): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 3.3 in stage 2.0 (TID 114, 172.18.11.166, executor 6): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 20.2 in stage 2.0 (TID 113, 172.18.11.166, executor 6): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 4.3 in stage 2.0 (TID 110, 172.18.11.166, executor 7): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 16.3 in stage 2.0 (TID 115, 172.18.11.166, executor 8): TaskKilled (stage cancelled) 18/02/24 10:50:49 WARN TaskSetManager: Lost task 6.2 in stage 2.0 (TID 112, 172.18.11.166, executor 8): TaskKilled (stage cancelled) org.apache.spark.SparkException: Job aborted. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:215) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438) at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) at com.databricks.spark.sql.perf.Tables$Table.genData(Tables.scala:227) at com.databricks.spark.sql.perf.Tables$$anonfun$genData$2.apply(Tables.scala:294) at com.databricks.spark.sql.perf.Tables$$anonfun$genData$2.apply(Tables.scala:292) at scala.collection.immutable.List.foreach(List.scala:381) at com.databricks.spark.sql.perf.Tables.genData(Tables.scala:292) ... 52 elided Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 100, 172.18.11.80, execu tor 4): java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 0if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, cs_sold_date_sk), StringType), true) AS cs_sold_date_sk#313if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 1, cs_sold_time_sk), StringType), true) AS cs_sold_time_sk#314if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 2, cs_ship_date_sk), StringType), true) AS cs_ship_date_sk#315if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 3, cs_bill_customer_sk), StringType), true) AS cs_bill_customer_sk#316if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 4, cs_bill_cdemo_sk), StringType), true) AS cs_bill_cdemo_sk#317if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 5, cs_bill_hdemo_sk), StringType), true) AS cs_bill_hdemo_sk#318if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 6, cs_bill_addr_sk), StringType), true) AS cs_bill_addr_sk#319if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 7, cs_ship_customer_sk), StringType), true) AS cs_ship_customer_sk#320if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 8, cs_ship_cdemo_sk), StringType), true) AS cs_ship_cdemo_sk#321if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 9, cs_ship_hdemo_sk), StringType), true) AS cs_ship_hdemo_sk#322if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 10, cs_ship_addr_sk), StringType), true) AS cs_ship_addr_sk#323if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 11, cs_call_center_sk), StringType), true) AS cs_call_center_sk#324if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, cs_catalog_page_sk), StringType), true) AS cs_catalog_page_sk#325if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 13, cs_ship_mode_sk), StringType), true) AS cs_ship_mode_sk#326if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 14, cs_warehouse_sk), StringType), true) AS cs_warehouse_sk#327if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 15, cs_item_sk), StringType), true) AS cs_item_sk#328if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 16, cs_promo_sk), StringType), true) AS cs_promo_sk#329if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 17, cs_order_number), StringType), true) AS cs_order_number#330if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 18, cs_quantity), StringType), true) AS cs_quantity#331if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 19, cs_wholesale_cost), StringType), true) AS cs_wholesale_cost#332if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 20, cs_list_price), StringType), true) AS cs_list_price#333if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 21, cs_sales_price), StringType), true) AS cs_sales_price#334if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 22, cs_ext_discount_amt), StringType), true) AS cs_ext_discount_amt#335if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 23, cs_ext_sales_price), StringType), true) AS cs_ext_sales_price#336if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 24, cs_ext_wholesale_cost), StringType), true) AS cs_ext_wholesale_cost#337if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 25, cs_ext_list_price), StringType), true) AS cs_ext_list_price#338if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 26, cs_ext_tax), StringType), true) AS cs_ext_tax#339if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 27, cs_coupon_amt), StringType), true) AS cs_coupon_amt#340if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 28, cs_ext_ship_cost), StringType), true) AS cs_ext_ship_cost#341if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 29, cs_net_paid), StringType), true) AS cs_net_paid#342if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 30, cs_net_paid_inc_tax), StringType), true) AS cs_net_paid_inc_tax#343if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 31, cs_net_paid_inc_ship), StringType), true) AS cs_net_paid_inc_ship#344if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#345if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#346 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more

Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188) ... 90 more Caused by: java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 0 if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, cs_sold_date_sk), StringType), true) AS cs_sold_date_sk#313if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 1, cs_sold_time_sk), StringType), true) AS cs_sold_time_sk#314if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 2, cs_ship_date_sk), StringType), true) AS cs_ship_date_sk#315if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 3, cs_bill_customer_sk), StringType), true) AS cs_bill_customer_sk#316if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 4, cs_bill_cdemo_sk), StringType), true) AS cs_bill_cdemo_sk#317if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 5, cs_bill_hdemo_sk), StringType), true) AS cs_bill_hdemo_sk#318if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 6, cs_bill_addr_sk), StringType), true) AS cs_bill_addr_sk#319if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 7, cs_ship_customer_sk), StringType), true) AS cs_ship_customer_sk#320if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 8, cs_ship_cdemo_sk), StringType), true) AS cs_ship_cdemo_sk#321if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 9, cs_ship_hdemo_sk), StringType), true) AS cs_ship_hdemo_sk#322if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 10, cs_ship_addr_sk), StringType), true) AS cs_ship_addr_sk#323if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 11, cs_call_center_sk), StringType), true) AS cs_call_center_sk#324if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, cs_catalog_page_sk), StringType), true) AS cs_catalog_page_sk#325if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 13, cs_ship_mode_sk), StringType), true) AS cs_ship_mode_sk#326if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 14, cs_warehouse_sk), StringType), true) AS cs_warehouse_sk#327if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 15, cs_item_sk), StringType), true) AS cs_item_sk#328if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 16, cs_promo_sk), StringType), true) AS cs_promo_sk#329if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 17, cs_order_number), StringType), true) AS cs_order_number#330if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 18, cs_quantity), StringType), true) AS cs_quantity#331if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 19, cs_wholesale_cost), StringType), true) AS cs_wholesale_cost#332if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 20, cs_list_price), StringType), true) AS cs_list_price#333if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 21, cs_sales_price), StringType), true) AS cs_sales_price#334if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 22, cs_ext_discount_amt), StringType), true) AS cs_ext_discount_amt#335if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 23, cs_ext_sales_price), StringType), true) AS cs_ext_sales_price#336if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 24, cs_ext_wholesale_cost), StringType), true) AS cs_ext_wholesale_cost#337if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 25, cs_ext_list_price), StringType), true) AS cs_ext_list_price#338if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 26, cs_ext_tax), StringType), true) AS cs_ext_tax#339if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 27, cs_coupon_amt), StringType), true) AS cs_coupon_amt#340if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 28, cs_ext_ship_cost), StringType), true) AS cs_ext_ship_cost#341if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 29, cs_net_paid), StringType), true) AS cs_net_paid#342if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 30, cs_net_paid_inc_tax), StringType), true) AS cs_net_paid_inc_tax#343if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 31, cs_net_paid_inc_ship), StringType), true) AS cs_net_paid_inc_ship#344if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#345if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getex ternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#346 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:573) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more

scala>

This is all the information!

juliuszsompolski commented 6 years ago

Hi @hahasdnu1029 ,

For /home/sparktest/tpcds-kit/tools do you use the toolkit from https://github.com/databricks/tpcds-kit ? The https://github.com/databricks/spark-sql-perf/blob/master/README.md says:

We have a fork of dsdgen that you will need. The fork includes changes to generate TPCDS data to stdout, so that this library can pipe them directly to Spark, without intermediate files. Therefore, this library will not work with the vanilla TPCDS kit.

hahasdnu1029 commented 6 years ago

hi @juliuszsompolski I used it.

juliuszsompolski commented 6 years ago

Hi @hahasdnu1029 Hmm... I see you're running a cluster with 20 executors. Do you have tpcds-kit on all the executors, or just on the driver? Unfortunately it seems to not be mentioned by the README, but dsdgen has to be present under the same path on all executors. I will fix that in the README.

hahasdnu1029 commented 6 years ago

tpcds-kit I put it on all cluster machines, not just drivers,And placed in the same directory。

juliuszsompolski commented 6 years ago

Hi @hahasdnu1029 I tried, and unfortunately I can't reproduce on my local laptop. I don't have a bigger spark cluster available currently to test it. Could you try 2 things: 1) run it in local mode, on driver only, saving to /tmp instead of hdfs, and see if you get the same error. 2) run it with wholestagecodegen disabled (spark.conf.set("spark.sql.codegen.wholeStage", "false")). With no codegen maybe we'll get more details from the stacktrace (now it's blended in the GeneratedClass)

gengliangwang commented 6 years ago

I hit the same issue. I was following the commands in README. Turn out that I was using https://github.com/gregrahn/tpcds-kit.git, instead of https://github.com/databricks/tpcds-kit.git I have just created a PR for this: https://github.com/databricks/tpcds-kit/pull/4 @hahasdnu1029 Using https://github.com/databricks/tpcds-kit.git should resolve your issue.

juliuszsompolski commented 6 years ago

@hahasdnu1029 Could you check that you haven't fallen into the same pitfall as @gengliangwang by following the README from https://github.com/databricks/tpcds-kit.git and accidentally getting https://github.com/gregrahn/tpcds-kit.git instead?

hahasdnu1029 commented 6 years ago

Hi @gengliangwang @juliuszsompolski

thanks,I'll try these ways.

juliuszsompolski commented 6 years ago

@Koprvhdix This will not work. In https://github.com/databricks/tpcds-kit/ the option is -filter See https://github.com/databricks/tpcds-kit/commit/8572a1bd0f679804244369b6777a78a0abdabe65 and in https://github.com/gregrahn/tpcds-kit.git the -_filter option is not working correctly, because it's missing https://github.com/databricks/tpcds-kit/commit/90b8e13a96577cab6788ba644fc0c12642132f18

Koprvhdix commented 6 years ago

@juliuszsompolski IC, I clone wrong tpcds-kit, sorry.

manikantabandaru commented 6 years ago

I am also facing same problem as above and i used correct git repo (https://github.com/databricks/tpcds-kit/ ) but still facing the issue and i executed in windows environment ` fe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache spark.sql.Row, true]), 32, cs_net_paid_inc_ship_tax), StringType), true) AS cs_net_paid_inc_ship_tax#345 if (assertnotnull(input[0, org.apache.spark.sql.Row, true]).isNullAt) null else staticinvoke(class org.apache.spark.uns fe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache spark.sql.Row, true]), 33, cs_net_profit), StringType), true) AS cs_net_profit#346 at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:582) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:582) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.spark.sql.catalyst.expressions.GenericRow.get(rows.scala:173) at org.apache.spark.sql.Row$class.isNullAt(Row.scala:191) at org.apache.spark.sql.catalyst.expressions.GenericRow.isNullAt(rows.scala:165) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.evalIfCondExpr$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287) ... 16 more

`

juliuszsompolski commented 6 years ago

@manikantabandaru Hm. Sorry, I have too little data to debug it. You could run something like

import com.databricks.spark.sql.perf.tpcds.DSDGEN
val dsdgen = new DSDGEN("/path/to/your/dsdgen")
val rdd = dsdgen.generate(sparkContext, "store", 1, "1")
rdd.foreach { row => println(row) }

(I don't guarantee it compiles, see https://github.com/databricks/spark-sql-perf/blob/master/src/main/scala/com/databricks/spark/sql/perf/tpcds/TPCDSTables.scala#L27 to troubleshoot)

That RDD[String] should contain generated TPCDS data as PSV (Pipe Separated Values).

If there's an error instead, it may hint you what's happening. If it's empty and instead you're seeing PSV .dat files being generated in the dir of you dsdgen - a wrong tpcds-kit have sneaked in.