dask-contrib / dask-histogram

Histograms with task scheduling.
https://dask-histogram.readthedocs.io
BSD 3-Clause "New" or "Revised" License
23 stars 4 forks source link

Return empty histogram on `compute()` if there are no staged fills #115

Closed douglasdavis closed 10 months ago

NJManganelli commented 11 months ago

Thanks for opening this @douglasdavis

For posterity, the slack post made earlier: " Some coffea2023/dask_awkward/dask_histogram testing. If compute is called on a collection of dask objects including any un-filled histograms, it gives the semi-cryptic error message in [1]. It seems like a pruning step such as removing histograms failing isinstance(.dask, dask.highlevelgraph.HighLevelGraph) would work for these, but during development I'm frequently creating histograms which aren't always filled, it could be that sometimes a histogram is only filled for certain datasets or under certain conditions. Delegating this all to the user is one choice, but I'd argue in favor of prompting a warning (or an error demotable to a warning) but returning a concrete empty histogram. " [1]

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[22], line 1
----> 1 dask.compute(res_ee)

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:611, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    566 def compute(
    567     *args, traverse=True, optimize_graph=True, scheduler=None, get=None, **kwargs
    568 ):
    569     """Compute several dask collections at once.
    570 
    571     Parameters
   (...)
    608     ({'a': 45, 'b': 4.5, 'c': 1},)
    609     """
--> 611     collections, repack = unpack_collections(*args, traverse=traverse)
    612     if not collections:
    613         return args

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:502, in unpack_collections(traverse, *args)
    499     return tok
    501 out = uuid.uuid4().hex
--> 502 repack_dsk[out] = (tuple, [_unpack(i) for i in args])
    504 def repack(results):
    505     dsk = repack_dsk.copy()

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:502, in <listcomp>(.0)
    499     return tok
    501 out = uuid.uuid4().hex
--> 502 repack_dsk[out] = (tuple, [_unpack(i) for i in args])
    504 def repack(results):
    505     dsk = repack_dsk.copy()

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:479, in unpack_collections.<locals>._unpack(expr)
    477     tsk = (typ, [_unpack(i) for i in expr])
    478 elif typ in (dict, OrderedDict):
--> 479     tsk = (typ, [[_unpack(k), _unpack(v)] for k, v in expr.items()])
    480 elif dataclasses.is_dataclass(expr) and not isinstance(expr, type):
    481     tsk = (
    482         apply,
    483         typ,
   (...)
    491         ),
    492     )

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:479, in <listcomp>(.0)
    477     tsk = (typ, [_unpack(i) for i in expr])
    478 elif typ in (dict, OrderedDict):
--> 479     tsk = (typ, [[_unpack(k), _unpack(v)] for k, v in expr.items()])
    480 elif dataclasses.is_dataclass(expr) and not isinstance(expr, type):
    481     tsk = (
    482         apply,
    483         typ,
   (...)
    491         ),
    492     )

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:463, in unpack_collections.<locals>._unpack(expr)
    462 def _unpack(expr):
--> 463     if is_dask_collection(expr):
    464         tok = tokenize(expr)
    465         if tok not in repack_dsk:

File ~/tchizh/lib/python3.11/site-packages/dask/base.py:214, in is_dask_collection(x)
    193 """Returns ``True`` if ``x`` is a dask collection.
    194 
    195 Parameters
   (...)
    211 
    212 """
    213 try:
--> 214     return x.__dask_graph__() is not None
    215 except (AttributeError, TypeError):
    216     return False

File ~/tchizh/lib/python3.11/site-packages/dask_histogram/boost.py:116, in Histogram.__dask_graph__(self)
    115 def __dask_graph__(self) -> HighLevelGraph:
--> 116     return self.dask

File ~/tchizh/lib/python3.11/site-packages/dask_histogram/boost.py:180, in Histogram.dask(self)
    177 @property
    178 def dask(self) -> HighLevelGraph:
    179     if self._dask is None:
--> 180         raise RuntimeError(
    181             "The dask graph should never be None when it's requested."
    182         )
    183     return self._dask

RuntimeError: The dask graph should never be None when it's requested.