When reading data with dask-deltalake and distributed client, an error happens:
<dask.highlevelgraph.HighLevelGraph object at 0x13fac3fd0>
0. read-delta-table-f039f9f4fa4ebf9b2dd57eb38dcfa70e
>.
Traceback (most recent call last):
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/pickle.py", line 63, in dumps
result = pickle.dumps(x, **dump_kwargs)
TypeError: cannot pickle 'builtins.RawDeltaTable' object
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/pickle.py", line 68, in dumps
pickler.dump(x)
TypeError: cannot pickle 'builtins.RawDeltaTable' object
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/pickle.py", line 81, in dumps
result = cloudpickle.dumps(x, **dump_kwargs)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/cloudpickle/cloudpickle_fast.py", line 632, in dump
return Pickler.dump(self, obj)
TypeError: cannot pickle 'builtins.RawDeltaTable' object
Traceback (most recent call last):
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/pickle.py", line 63, in dumps
result = pickle.dumps(x, **dump_kwargs)
TypeError: cannot pickle 'builtins.RawDeltaTable' object
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/pickle.py", line 68, in dumps
pickler.dump(x)
TypeError: cannot pickle 'builtins.RawDeltaTable' object
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/serialize.py", line 350, in serialize
header, frames = dumps(x, context=context) if wants_context else dumps(x)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/serialize.py", line 73, in pickle_dumps
frames[0] = pickle.dumps(
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/pickle.py", line 81, in dumps
result = cloudpickle.dumps(x, **dump_kwargs)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/cloudpickle/cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/cloudpickle/cloudpickle_fast.py", line 632, in dump
return Pickler.dump(self, obj)
TypeError: cannot pickle 'builtins.RawDeltaTable' object
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/jbennet/src/dask-deltatable/t6.py", line 25, in <module>
res = ddf_read.compute()
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/base.py", line 310, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/base.py", line 595, in compute
results = schedule(dsk, keys, **kwargs)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/client.py", line 3207, in get
futures = self._graph_to_futures(
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/client.py", line 3106, in _graph_to_futures
header, frames = serialize(ToPickle(dsk), on_error="raise")
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/distributed/protocol/serialize.py", line 372, in serialize
raise TypeError(msg, str(x)[:10000]) from exc
TypeError: ('Could not serialize object of type HighLevelGraph', '<ToPickle: HighLevelGraph with 1 layers.\n<dask.highlevelgraph.HighLevelGraph object at 0x13fac3fd0>\n 0. read-delta-table-f039f9f4fa4ebf9b2dd57eb38dcfa70e\n>')
Reproducer:
import os
import shutil
import pandas as pd
import numpy as np
import dask.dataframe as dd
import dask_deltatable as ddt
from distributed import Client
if __name__ == "__main__":
df = pd.DataFrame({
"i1": np.random.randint(1, 10000, size=100),
"f1": np.random.random(100),
"s1": np.random.choice(["Apple", "Banana", "Watermelon", "Mango"], size=100),
})
ddf = dd.from_pandas(df, npartitions=10)
data_path = "data/t6_data"
if os.path.exists(data_path):
shutil.rmtree(data_path)
ddt.to_deltalake(data_path, ddf)
client = Client(processes=True)
try:
ddf_read = ddt.read_deltalake(data_path)
res = ddf_read.compute()
print(f"\n{res}")
finally:
client.close()
When reading data with dask-deltalake and distributed client, an error happens:
Reproducer: