Open NvTimLiu opened 1 year ago
IllegalArgumentException
----------------------------- Captured stdout call -----------------------------
### CPU RUN ###
### GPU RUN ###
std_input_path = 'hdfs:/input-path/integration_tests/src/test/resources'
filename = 'date.csv'
schema = StructType([StructField('ts', TimestampType(), True)])
@allow_non_gpu('FileSourceScanExec')
@pytest.mark.skipif(is_before_spark_340(), reason='enableDateTimeParsingFallback is supported from Spark3.4.0')
@pytest.mark.parametrize('filename,schema',[("date.csv", _date_schema), ("date.csv", _ts_schema,),
("ts.csv", _ts_schema)])
def test_csv_datetime_parsing_fallback_cpu_fallback(std_input_path, filename, schema):
data_path = std_input_path + "/" + filename
assert_gpu_fallback_collect(
lambda spark : spark.read.schema(schema).option('enableDateTimeParsingFallback', "true").csv(data_path),
'FileSourceScanExec',
> conf=_enable_all_types_conf)
:576:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
:430: in assert_gpu_fallback_collect
from_gpu, gpu_df = with_gpu_session(bring_back, conf=conf)
:133: in with_gpu_session
return with_spark_session(func, conf=copy)
:100: in with_spark_session
ret = func(_spark)
:208: in bring_back
return (df.collect(), df)
:1217: in collect
sock_info = self._jdf.collectToPython()
:1323: in __call__
answer, self.gateway_client, self.target_id, self.name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
a = ('xro251702', <py4j.clientserver.JavaClient object at 0x7f1cedc63410>, 'o251701', 'collectToPython')
kw = {}, converted = IllegalArgumentException()
def deco(*a: Any, **kw: Any) -> Any:
try:
return f(*a, **kw)
except Py4JJavaError as e:
converted = convert_exception(e.java_exception)
if not isinstance(converted, UnknownException):
# Hide where the exception came from that shows a non-Pythonic
# JVM exception message.
> raise converted from None
CPU/GPU output not equal
=================================== FAILURES ===================================
spark_tmp_path = '/tmp/pyspark_tests//ip-172-31-8-237-main-10407-1322606390/'
metadata_column = 'file_path'
@pytest.mark.skipif(is_before_spark_330(), reason='Hidden file metadata columns are a new feature of Spark 330')
@allow_non_gpu(any = True)
@pytest.mark.parametrize('metadata_column', ["file_path", "file_name", "file_size", "file_modification_time"])
def test_csv_scan_with_hidden_metadata_fallback(spark_tmp_path, metadata_column):
data_path = spark_tmp_path + "/hidden_metadata.csv"
with_cpu_session(lambda spark : spark.range(10) \
.selectExpr("id") \
.write \
.mode("overwrite") \
.csv(data_path))
def do_csv_scan(spark):
df = spark.read.csv(data_path).selectExpr("_c0", "_metadata.{}".format(metadata_column))
return df
assert_cpu_and_gpu_are_equal_collect_with_capture(
do_csv_scan,
exist_classes= "FileSourceScanExec",
> non_exist_classes= "GpuBatchScanExec")
:504:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
:378: in assert_cpu_and_gpu_are_equal_collect_with_capture
from_cpu, cpu_df = with_cpu_session(bring_back, conf=conf)
:116: in with_cpu_session
return with_spark_session(func, conf=copy)
:100: in with_spark_session
ret = func(_spark)
:207: in bring_back
df = limit_func(spark)
:498: in do_csv_scan
df = spark.read.csv(data_path).selectExpr("_c0", "_metadata.{}".format(metadata_column))
:3077: in selectExpr
jdf = self._jdf.selectExpr(self._jseq(expr))
:1323: in __call__
answer, self.gateway_client, self.target_id, self.name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
a = ('xro248260', <py4j.clientserver.JavaClient object at 0x7f1cedc63410>, 'o248257', 'selectExpr')
kw = {}, converted = AnalysisException()
def deco(*a: Any, **kw: Any) -> Any:
try:
return f(*a, **kw)
except Py4JJavaError as e:
converted = convert_exception(e.java_exception)
if not isinstance(converted, UnknownException):
# Hide where the exception came from that shows a non-Pythonic
# JVM exception message.
> raise converted from None
Describe the bug delta_lake_test FAILED on: org.apache.spark.sql.delta.ColumnMappingUnsupportedException: The column mapping mode
id
is not supported for this Delta version. Please upgrade if you want to use this mode.Test FAILED with spark shims
3.2.x
&3.3.x
, SKIPPED on other spark shims' pytests.Related PR: https://github.com/NVIDIA/spark-rapids/pull/9279/