However it is able to output result, when using CPU execution.
What you expected to happen:
It will not bring error, when using GPU execution.
Minimal Complete Verifiable Example:
import pandas as pd
import dask.dataframe as dd
from dask_sql import Context
c = Context()
df0 = pd.DataFrame({
'c0': [3231.0000],
})
t0 = dd.from_pandas(df0, npartitions=1)
c.create_table('t0', t0, gpu=False)
c.create_table('t0_gpu', t0, gpu=True)
df1 = pd.DataFrame({
'c0': [1537.0000],
})
t1 = dd.from_pandas(df1, npartitions=1)
c.create_table('t1', t1, gpu=False)
c.create_table('t1_gpu', t1, gpu=True)
df2 = pd.DataFrame({
'c0': [''],
'c1': ["DATE '2008-11-25'"],
})
t2 = dd.from_pandas(df2, npartitions=1)
c.create_table('t2', t2, gpu=False)
c.create_table('t2_gpu', t2, gpu=True)
print('CPU Result:')
result1= c.sql("SELECT (CASE '1' WHEN t2.c1 THEN t2.c0 ELSE '' END ) FROM t1, t0 LEFT JOIN t2 ON (true)").compute()
print(result1)
print('GPU Result:')
result2= c.sql("SELECT (CASE '1' WHEN t2_gpu.c1 THEN t2_gpu.c0 ELSE '' END ) FROM t1_gpu, t0_gpu LEFT JOIN t2_gpu ON (true)").compute()
print(result2)
Result:
INFO:numba.cuda.cudadrv.driver:init
CPU Result:
CASE Utf8("1") WHEN t2.c1 THEN t2.c0 ELSE Utf8("") END
0
GPU Result:
Traceback (most recent call last):
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/utils.py", line 193, in raise_on_meta_error
yield
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6793, in _emulate
return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/utils.py", line 1105, in __call__
return getattr(__obj, self.method)(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/nvtx/nvtx.py", line 101, in inner
result = func(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/series.py", line 3504, in where
result_col = super().where(cond, other, inplace)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/nvtx/nvtx.py", line 101, in inner
result = func(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/single_column_frame.py", line 434, in where
input_col, other = _check_and_cast_columns_with_other(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/_internals/where.py", line 84, in _check_and_cast_columns_with_other
if _is_non_decimal_numeric_dtype(source_dtype) and _can_cast(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/utils/dtypes.py", line 680, in _can_cast
return np.can_cast(from_dtype, to_dtype)
File "<__array_function__ internals>", line 200, in can_cast
TypeError: did not understand one of the types; 'None' not accepted
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/tmp/test.py", line 34, in <module>
result2= c.sql("SELECT (CASE '1' WHEN t2_gpu.c1 THEN t2_gpu.c0 ELSE '' END ) FROM t1_gpu, t0_gpu LEFT JOIN t2_gpu ON (true)").compute()
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/context.py", line 513, in sql
return self._compute_table_from_rel(rel, return_futures)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/context.py", line 839, in _compute_table_from_rel
dc = RelConverter.convert(rel, context=self)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/convert.py", line 61, in convert
df = plugin_instance.convert(rel, context=context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rel/logical/project.py", line 57, in convert
new_columns[random_name] = RexConverter.convert(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rex/convert.py", line 74, in convert
df = plugin_instance.convert(rel, rex, dc, context=context)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rex/core/call.py", line 1129, in convert
return operation(*operands, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rex/core/call.py", line 77, in __call__
return self.f(*operands, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask_sql/physical/rex/core/call.py", line 221, in case
return then.where(where, other=other)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 3349, in where
return map_partitions(M.where, self, cond, other, enforce_metadata=False)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6863, in map_partitions
meta = _get_meta_map_partitions(args, dfs, func, kwargs, meta, parent_meta)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6974, in _get_meta_map_partitions
meta = _emulate(func, *args, udf=True, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6792, in _emulate
with raise_on_meta_error(funcname(func), udf=udf), check_numeric_only_deprecation():
File "/opt/conda/envs/rapids/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/utils.py", line 214, in raise_on_meta_error
raise ValueError(msg) from e
ValueError: Metadata inference failed in `where`.
You have supplied a custom function and Dask is unable to
determine the type of output that that function returns.
To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.
Original error is below:
------------------------
TypeError("did not understand one of the types; 'None' not accepted")
Traceback:
---------
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/utils.py", line 193, in raise_on_meta_error
yield
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/dataframe/core.py", line 6793, in _emulate
return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/dask/utils.py", line 1105, in __call__
return getattr(__obj, self.method)(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/nvtx/nvtx.py", line 101, in inner
result = func(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/series.py", line 3504, in where
result_col = super().where(cond, other, inplace)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/nvtx/nvtx.py", line 101, in inner
result = func(*args, **kwargs)
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/single_column_frame.py", line 434, in where
input_col, other = _check_and_cast_columns_with_other(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/_internals/where.py", line 84, in _check_and_cast_columns_with_other
if _is_non_decimal_numeric_dtype(source_dtype) and _can_cast(
File "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/utils/dtypes.py", line 680, in _can_cast
return np.can_cast(from_dtype, to_dtype)
File "<__array_function__ internals>", line 200, in can_cast
What happened:
SELECT (CASE \ WHEN \ THEN \ ELSE \ END ) FROM \ JOIN \ brings error, when using GPU execution.
- dask-sql version: 2023.6.0
- Python version: Python 3.10.11
- Operating System: Ubuntu22.04
- Install method (conda, pip, source): Docker deploy by https://hub.docker.com/layers/rapidsai/rapidsai-dev/23.06-cuda11.8-devel-ubuntu22.04-py3.10/images/sha256-cfbb61fdf7227b090a435a2e758114f3f1c31872ed8dbd96e5e564bb5fd184a7?context=explore
- © Githubissues.
- Githubissues is a development platform for aggregating issues.
However it is able to output result, when using CPU execution.
What you expected to happen:
It will not bring error, when using GPU execution.
Minimal Complete Verifiable Example:
Result:
Anything else we need to know?:
Environment: