holoviz / holoviews

With Holoviews, your data visualizes itself.
https://holoviews.org
BSD 3-Clause "New" or "Revised" License
2.66k stars 396 forks source link

HoloViews kdims go missing when sent to a separate process #6244

Closed ahuang11 closed 1 month ago

ahuang11 commented 1 month ago

This works:

import holoviews as hv
import dask

hv.extension("bokeh")

@dask.delayed
def test_save(hv_obj):
    print(hv_obj.kdims)
    hv.save(hv_obj, "test.html")

test_save(hv.Curve([0, 1, 2], kdims=["Yaxis"])).compute()

# outputs
[Dimension('Yaxis')]

This results in an error:

import holoviews as hv
import dask

hv.extension("bokeh")

@dask.delayed
def test_save(hv_obj):
    print(hv_obj.kdims)
    hv.save(hv_obj, "test.html")

test_save(hv.Curve([0, 1, 2], kdims=["Yaxis"])).compute(scheduler="processes")

# outputs
[Dimension('x')]
File ~/miniforge3/envs/holoviews/lib/python3.10/site-packages/dask/base.py:375, in DaskMethodsMixin.compute(self, **kwargs)
    351 def compute(self, **kwargs):
    352     """Compute this dask collection
    353 
    354     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    373     dask.compute
    374     """
--> 375     (result,) = compute(self, traverse=False, **kwargs)
    376     return result

File ~/miniforge3/envs/holoviews/lib/python3.10/site-packages/dask/base.py:661, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    658     postcomputes.append(x.__dask_postcompute__())
    660 with shorten_traceback():
--> 661     results = schedule(dsk, keys, **kwargs)
    663 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File ~/miniforge3/envs/holoviews/lib/python3.10/site-packages/dask/multiprocessing.py:112, in reraise(exc, tb)
    110 def reraise(exc, tb=None):
    111     exc = remote_exception(exc, tb)
--> 112     raise exc

KeyError: 'x'
ahuang11 commented 1 month ago

Can be simplified to:

import holoviews as hv
from dask.distributed.protocol import serialize, deserialize

curve = hv.Curve([0, 1, 2], kdims=["X-AXIS"])

print(type(curve))
print(curve.kdims, "BEFORE")
curve = deserialize(*serialize(curve))
print(curve.kdims, "AFTER")

And not specific to dask:

import holoviews as hv
import concurrent.futures

hv.extension("bokeh")

def test_save(hv_obj):
    print(hv_obj.kdims)
    print(hv_obj.kdims)
    hv.save(hv_obj, "test.html")

if __name__ == "__main__":
    curve = hv.Curve([0, 1, 2], kdims=["Yaxis"])
    with concurrent.futures.ProcessPoolExecutor() as executor:
        future = executor.submit(test_save, curve)
        result = future.result()

Or even simplest:

import pickle
import holoviews as hv
hv.extension("bokeh")

curve = hv.Curve([0, 1, 2], kdims=["XAXIS"])
pickle.loads(pickle.dumps(curve))