CoffeaTeam / coffea

Basic tools and wrappers for enabling not-too-alien syntax when running columnar Collider HEP analysis.
https://coffeateam.github.io/coffea/
BSD 3-Clause "New" or "Revised" License
132 stars 126 forks source link

Issue with ak.flatten() for NanoEventsFactory with delayed=False #1199

Open rkansal47 opened 1 week ago

rkansal47 commented 1 week ago

Describe the bug

Error:

TypeError: cannot compare unknown lengths against known values

This error occurred while calling

    ak.flatten(
        Array-instance
        axis = 3
    )

with coffea 2024.10.0 awkward 2.6.9.

To Reproduce MRE:

events_delayed = nanoevents.NanoEventsFactory.from_root(
    {
        "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2017/HH/GluGluToHHTobbVV_node_cHHH0_TuneCP5_13TeV-powheg-pythia8/GluGluToHHTobbVV_node_cHHH0/220808_163755/0000/nano_mc2017_1-1.root": "Events"
    },
    schemaclass=nanoevents.NanoAODSchema,
    delayed=True,
).events()

higgs = events_delayed.GenPart[events_delayed.GenPart.hasFlags(["fromHardProcess", "isLastCopy"]) * (events_delayed.GenPart.pdgId == 25)]
ak.flatten(higgs.children.children, axis=3)

This code runs fine with delayed=False.

Full stack trace:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[22], line 10
      1 events_delayed = nanoevents.NanoEventsFactory.from_root(
      2     {
      3         "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2017/HH/GluGluToHHTobbVV_node_cHHH0_TuneCP5_13TeV-powheg-pythia8/GluGluToHHTobbVV_node_cHHH0/220808_163755/0000/nano_mc2017_1-1.root": "Events"
   (...)
      6     delayed=True,
      7 ).events()
      9 higgs = events_delayed.GenPart[events_delayed.GenPart.hasFlags(["fromHardProcess", "isLastCopy"]) * (events_delayed.GenPart.pdgId == 25)]
---> 10 ak.flatten(higgs.children.children, axis=3)

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/_dispatch.py:51, in named_high_level_function.<locals>.dispatch(*args, **kwargs)
     49     continue
     50 else:
---> 51     result = custom_impl(dispatch, array_likes, args, kwargs)
     53     # Future proof the implementation by permitting the `__awkward_function__` to return `NotImplemented`
     54     # This may later be used to signal that another overload should be used.
     55     if result is NotImplemented:

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/core.py:1693, in Array.__awkward_function__(self, func, array_likes, args, kwargs)
   1691     except AttributeError:
   1692         return NotImplemented
-> 1693 return fn(*args, **kwargs)

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/structure.py:445, in flatten(array, axis, highlevel, behavior, attrs)
    443 if not highlevel:
    444     raise ValueError("Only highlevel=True is supported")
--> 445 return map_partitions(
    446     _FlattenFn(axis=axis, highlevel=highlevel, behavior=behavior, attrs=attrs),
    447     array,
    448     label="flatten",
    449     output_divisions=None,
    450 )

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/core.py:2186, in map_partitions(base_fn, label, token, meta, output_divisions, traverse, *args, **kwargs)
   2178         arg_lens_for_repackers.append(1)
   2180 fn = ArgsKwargsPackedFunction(
   2181     base_fn,
   2182     arg_repackers,
   2183     kwarg_repacker,
   2184     arg_lens_for_repackers,
   2185 )
-> 2186 return _map_partitions(
   2187     fn,
   2188     *arg_flat_deps_expanded,
   2189     *kwarg_flat_deps,
   2190     label=label,
   2191     token=token,
   2192     meta=meta,
   2193     output_divisions=output_divisions,
   2194 )

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/core.py:2022, in _map_partitions(fn, label, token, meta, output_divisions, *args, **kwargs)
   2014 lay = partitionwise_layer(
   2015     fn,
   2016     name,
   2017     *args,
   2018     **kwargs,
   2019 )
   2021 if meta is None:
-> 2022     meta = map_meta(fn, *args, **kwargs)
   2024 hlg = HighLevelGraph.from_collections(
   2025     name,
   2026     lay,
   2027     dependencies=deps,
   2028 )
   2030 if len(dak_arrays) == 0:

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/core.py:2538, in map_meta(fn, *deps)
   2534 def map_meta(fn: Callable | ArgsKwargsPackedFunction, *deps: Any) -> ak.Array | None:
   2535     # NOTE: fn is assumed to be a *packed* function
   2536     #       as defined up in map_partitions. be careful!
   2537     try:
-> 2538         meta = fn(*to_meta(deps))
   2539         return meta
   2540     except Exception as err:
   2541         # if compute-unknown-meta is False then we don't care about
   2542         # this failure and we return None.

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/core.py:1982, in ArgsKwargsPackedFunction.__call__(self, *args_deps_expanded)
   1980     len_args += n_args
   1981 kwargs = self.kwarg_repacker(args_deps_expanded[len_args:])[0]
-> 1982 return self.fn(*args, **kwargs)

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/dask_awkward/lib/structure.py:432, in _FlattenFn.__call__(self, array)
    431 def __call__(self, array: ak.Array) -> ak.Array:
--> 432     return ak.flatten(array, **self.kwargs)

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/_dispatch.py:64, in named_high_level_function.<locals>.dispatch(*args, **kwargs)
     62 # Failed to find a custom overload, so resume the original function
     63 try:
---> 64     next(gen_or_result)
     65 except StopIteration as err:
     66     return err.value

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/operations/ak_flatten.py:172, in flatten(array, axis, highlevel, behavior, attrs)
    169 yield (array,)
    171 # Implementation
--> 172 return _impl(array, axis, highlevel, behavior, attrs)

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/operations/ak_flatten.py:236, in _impl(array, axis, highlevel, behavior, attrs)
    234     out = apply(layout)
    235 else:
--> 236     out = ak._do.flatten(layout, axis)
    237 return ctx.wrap(out, highlevel=highlevel)

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/_do.py:196, in flatten(layout, axis)
    195 def flatten(layout: Content, axis: int = 1) -> Content:
--> 196     offsets, flattened = layout._offsets_and_flattened(axis, 1)
    197     return flattened

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:724, in ListOffsetArray._offsets_and_flattened(self, axis, depth)
    721     return (listoffsetarray.offsets, content)
    723 else:
--> 724     inneroffsets, flattened = self._content._offsets_and_flattened(
    725         axis, depth + 1
    726     )
    727     offsets = Index64.zeros(
    728         0,
    729         nplike=self._backend.index_nplike,
    730         dtype=np.int64,
    731     )
    733     if inneroffsets.length is not unknown_length and inneroffsets.length == 0:

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:724, in ListOffsetArray._offsets_and_flattened(self, axis, depth)
    721     return (listoffsetarray.offsets, content)
    723 else:
--> 724     inneroffsets, flattened = self._content._offsets_and_flattened(
    725         axis, depth + 1
    726     )
    727     offsets = Index64.zeros(
    728         0,
    729         nplike=self._backend.index_nplike,
    730         dtype=np.int64,
    731     )
    733     if inneroffsets.length is not unknown_length and inneroffsets.length == 0:

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/contents/indexedoptionarray.py:595, in IndexedOptionArray._offsets_and_flattened(self, axis, depth)
    591 next = self._content._carry(nextcarry, False)
    593 offsets, flattened = next._offsets_and_flattened(axis, depth)
--> 595 if offsets.length == 0:
    596     return (
    597         offsets,
    598         ak.contents.IndexedOptionArray(
    599             outindex, flattened, parameters=self._parameters
    600         ),
    601     )
    603 else:

File ~/mambaforge/envs/python311/lib/python3.11/site-packages/awkward/_nplikes/shape.py:70, in UnknownLength.__eq__(self, other)
     68     return True
     69 else:
---> 70     raise TypeError("cannot compare unknown lengths against known values")

TypeError: cannot compare unknown lengths against known values

This error occurred while calling

    ak.flatten(
        Array-instance
        axis = 3
    )
rkansal47 commented 1 week ago

Following up on our discussion @lgray, I saved the higgs.children.children[:10] awkward array to json [1] and loaded it with dask_awkward and the same flatten operation worked correctly:

a = dak.from_json(Path("./higgs_children.json"), line_delimited=False)[0]
ak.flatten(a, axis=3).compute()  # works

[1] higgs_children.json