create table bigdata.tmp_test(
bz date,
amt double
)
partitioned by (`bz`);
create table bigdata.tmp_test2(
bz date,
amt double
);
insert into bigdata.tmp_test2
select /*+ REPARTITION(1) */ *
from values
(date'2024-11-20', cast(null as double)),
(date'2024-11-21', cast(null as double)),
(date'2024-11-20', cast(null as double)),
(date'2024-11-21', cast(null as double)) as tab(`bz`, amt);
insert into bigdata.tmp_test(`bz`, `amt`)
select `bz`, `amt` from bigdata.tmp_test2;
error:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 4.0 failed 4 times, most recent failure: Lost task 0.3 in stage 4.0 (TID 7) (emr-worker-6.cluster-193258 executor 3): org.apache.spark.SparkException: Task failed while writing rows to staging path: hdfs://emr-cluster/user/hive/warehouse/bigdata.db/tmp_test/_temporary/0/_temporary/attempt_202411221316176936724271216282837_0004_m_000000_7, output path: hdfs://emr-cluster/user/hive/warehouse/bigdata.db/tmp_test
at org.apache.spark.sql.execution.VeloxColumnarWriteFilesRDD.compute(VeloxColumnarWriteFilesExec.scala:234)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:331)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.gluten.exception.GlutenException: org.apache.gluten.exception.GlutenException: Exception: VeloxRuntimeError
Error Source: RUNTIME
Error Code: INVALID_STATE
Reason: An unsupported nested encoding was found.
Retriable: False
Expression: vec.valueVector() == nullptr || vec.wrappedVector()->isFlatEncoding()
Context: Operator: TableWrite[2] 2
Function: exportFlattenedVector
File: /opt/gluten/ep/build-velox/build/velox_ep/velox/vector/arrow/Bridge.cpp
Line: 1081
Stack trace:
# 0 _ZN8facebook5velox7process10StackTraceC1Ei
# 1 _ZN8facebook5velox14VeloxExceptionC1EPKcmS3_St17basic_string_viewIcSt11char_traitsIcEES7_S7_S7_bNS1_4TypeES7_
# 2 _ZN8facebook5velox6detail14veloxCheckFailINS0_17VeloxRuntimeErrorEPKcEEvRKNS1_18VeloxCheckFailArgsET0_
# 3 _ZN8facebook5velox12_GLOBAL__N_121exportFlattenedVectorERKNS0_10BaseVectorERKNS1_9SelectionERK12ArrowOptionsR10ArrowArrayPNS0_6memory10MemoryPoolERNS1_24VeloxToArrowBridgeHolderE
# 4 _ZN8facebook5velox12_GLOBAL__N_117exportToArrowImplERKNS0_10BaseVectorERKNS1_9SelectionERK12ArrowOptionsR10ArrowArrayPNS0_6memory10MemoryPoolE
# 5 _ZN8facebook5velox12_GLOBAL__N_117exportToArrowImplERKNS0_10BaseVectorERKNS1_9SelectionERK12ArrowOptionsR10ArrowArrayPNS0_6memory10MemoryPoolE
# 6 _ZN8facebook5velox13exportToArrowERKSt10shared_ptrINS0_10BaseVectorEER10ArrowArrayPNS0_6memory10MemoryPoolERK12ArrowOptions
# 7 _ZN8facebook5velox7parquet6Writer5writeERKSt10shared_ptrINS0_10BaseVectorEE
# 8 _ZN8facebook5velox9connector4hive12HiveDataSink5writeEmSt10shared_ptrINS0_9RowVectorEE
# 9 _ZN8facebook5velox9connector4hive12HiveDataSink10appendDataESt10shared_ptrINS0_9RowVectorEE
# 10 _ZN8facebook5velox4exec11TableWriter8addInputESt10shared_ptrINS0_9RowVectorEE
# 11 _ZZN8facebook5velox4exec6Driver11runInternalERSt10shared_ptrIS2_ERS3_INS1_13BlockingStateEERS3_INS0_9RowVectorEEENKUlvE4_clEv
# 12 _ZN8facebook5velox4exec6Driver11runInternalERSt10shared_ptrIS2_ERS3_INS1_13BlockingStateEERS3_INS0_9RowVectorEE
# 13 _ZN8facebook5velox4exec6Driver4nextEPN5folly10SemiFutureINS3_4UnitEEE
# 14 _ZN8facebook5velox4exec4Task4nextEPN5folly10SemiFutureINS3_4UnitEEE
# 15 _ZN6gluten24WholeStageResultIterator4nextEv
# 16 Java_org_apache_gluten_vectorized_ColumnarBatchOutIterator_nativeHasNext
# 17 0x00007f0919018427
at org.apache.gluten.iterator.ClosableIterator.hasNext(ClosableIterator.java:41)
at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:45)
at org.apache.gluten.iterator.IteratorsV1$InvocationFlowProtection.hasNext(IteratorsV1.scala:159)
at org.apache.gluten.iterator.IteratorsV1$IteratorCompleter.hasNext(IteratorsV1.scala:71)
at org.apache.gluten.iterator.IteratorsV1$PayloadCloser.hasNext(IteratorsV1.scala:37)
at org.apache.gluten.iterator.IteratorsV1$LifeTimeAccumulator.hasNext(IteratorsV1.scala:100)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at org.apache.spark.sql.execution.VeloxColumnarWriteFilesRDD.$anonfun$compute$2(VeloxColumnarWriteFilesExec.scala:204)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1563)
at org.apache.spark.sql.execution.VeloxColumnarWriteFilesRDD.compute(VeloxColumnarWriteFilesExec.scala:199)
... 11 more
Backend
VL (Velox)
Bug description
error:
Spark version
Spark-3.4.x
Spark configurations
No response
System information
Backend: Velox Backend Branch: HEAD Backend Revision: c1c40d997551d83ccaaaa9c2e26cabbc949a425a Backend Revision Time: 2024-11-15 21:00:05 +0000 GCC Version: GCC: (GNU) 11.2.1 20220127 (Red Hat 11.2.1-9) Gluten Branch: Gluten Build Time: 2024-11-17T05:43:21Z Gluten Repo URL: Gluten Revision: Gluten Revision Time: Gluten Version: 1.3.0-SNAPSHOT Hadoop Version: 2.7.4 Java Version: 1.8 Scala Version: 2.12.15 Spark Version: 3.4.4
Relevant logs
No response