We can't currently write categoricals to DeltaLake.
import pandas as pd
import numpy as np
import dask.dataframe as dd
from dask_deltatable.write import to_deltalake
if __name__ == "__main__":
df = pd.DataFrame({
"i1": np.random.randint(1, 10000, size=100),
"f1": np.random.random(100),
"c1": pd.Series(np.random.choice(["Apple", "Banana", "Watermelon", "Mango"], size=100), dtype="category"),
})
ddf = dd.from_pandas(df, npartitions=10)
to_deltalake("t4_data", ddf).compute()
This raises:
Traceback (most recent call last):
File "/Users/jbennet/src/dask-deltatable/t4.py", line 14, in <module>
to_deltalake("t4_data", ddf).compute()
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/base.py", line 310, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/base.py", line 595, in compute
results = schedule(dsk, keys, **kwargs)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/threaded.py", line 89, in get
results = get_async(
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/local.py", line 511, in get_async
raise_exception(exc, tb)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/local.py", line 319, in reraise
raise exc
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/local.py", line 224, in execute_task
result = _execute_task(task, data)
File "/Users/jbennet/mambaforge/envs/dask-deltatable/lib/python3.9/site-packages/dask/core.py", line 121, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/Users/jbennet/src/dask-deltatable/dask_deltatable/write.py", line 180, in _commit
_write_new_deltalake(
Exception: Schema error: Invalid data type for Delta Lake: Dictionary(Int8, Utf8)
Pyarrow supports writing categories; delta-rs does not.
We can't currently write categoricals to DeltaLake.
This raises:
Pyarrow supports writing categories; delta-rs does not.