-- First table.
CREATE OR REPLACE TEMP VIEW t1 AS VALUES (1, 2), (null, 7) AS (c1, c2);
-- Second table.
CREATE OR REPLACE TEMP VIEW t1 AS VALUES (2, 3) AS (c1, c2);
-- JOIN and EXPLODE.
SELECT EXPLODE(ARRAY(C1)) FROM t1 FULL JOIN t2 USING (c1);
Failure trace:
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2454)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2403)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2402)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2402)
...
Caused by: java.lang.AssertionError: value at 0 is null
at ai.rapids.cudf.HostColumnVectorCore.assertsForGet(HostColumnVectorCore.java:230)
at ai.rapids.cudf.HostColumnVectorCore.getInt(HostColumnVectorCore.java:256)
at com.nvidia.spark.rapids.RapidsHostColumnVectorCore.getInt(RapidsHostColumnVectorCore.java:109)
at org.apache.spark.sql.vectorized.ColumnarBatchRow.getInt(ColumnarBatch.java:202)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
Note: Apache Spark versions < 3.4 do not crash on CPU, but they do return the wrong results. This was fixed in Apache Spark 3.4.
Any tests added for this need to be skipped version < 3.4.
Repro SQL:
Failure trace:
Note: Apache Spark versions < 3.4 do not crash on CPU, but they do return the wrong results. This was fixed in Apache Spark 3.4.
Any tests added for this need to be skipped version < 3.4.