scikit-hep / awkward

Manipulate JSON-like data with NumPy-like idioms.
https://awkward-array.org
BSD 3-Clause "New" or "Revised" License
843 stars 89 forks source link

ak.from_rdataframe exception in local tests #2195

Closed jpivarski closed 1 year ago

jpivarski commented 1 year ago

Version of Awkward Array

HEAD

Description and code to reproduce

@ianna, is this related to a recent PR?

tests/test_1473_from_rdataframe.py F

=========================================== FAILURES ===========================================
________________________________ test_to_from_data_frame_large _________________________________

    def test_to_from_data_frame_large():
        # Note, with n = 30 (14348907) this test takes ~40 sec to run on my laptop
        n = 6
        assert 2 * (n // 2) == n
        rows = 3 ** (n // 2)
        cols = n

        arr = np.zeros((rows, cols), dtype=np.int64)
        shape = (rows,)

        source = np.array([-1, 0, 1], dtype=np.int64)[:, None]

        for col in range(n // 2):
            shape = (
                -1,
                3,
                shape[-1] // 3,
            )
            col_view = arr[:, col]
            col_view.shape = shape
            col_view[:] = source

        ak_array_in = ak.from_numpy(arr, regulararray=True)

        data_frame = ak.to_rdataframe({"x": ak_array_in})

>       ak_array_out = ak.from_rdataframe(
            data_frame,
            columns=("x",),
        )

ak_array_in = <Array [[-1, -1, -1, 0, 0, 0], ..., [1, 1, 1, ..., 0, 0]] type='27 * 6 * int64'>
arr        = array([[-1, -1, -1,  0,  0,  0],
       [-1, -1,  0,  0,  0,  0],
       [-1, -1,  1,  0,  0,  0],
       [-1,  0, -1,...0,  1,  0,  0,  0],
       [ 1,  1, -1,  0,  0,  0],
       [ 1,  1,  0,  0,  0,  0],
       [ 1,  1,  1,  0,  0,  0]])
col        = 2
col_view   = array([[[-1],
        [ 0],
        [ 1]],

       [[-1],
        [ 0],
        [ 1]],

       [[-1],
        [ 0],
  ...,
        [ 0],
        [ 1]],

       [[-1],
        [ 0],
        [ 1]],

       [[-1],
        [ 0],
        [ 1]]])
cols       = 6
data_frame = <cppyy.gbl.ROOT.RDataFrame object at 0x56223fcbbc90>
n          = 6
rows       = 27
shape      = (-1, 3, 1)
source     = array([[-1],
       [ 0],
       [ 1]])

tests/test_1473_from_rdataframe.py:42: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/awkward/operations/ak_from_rdataframe.py:30: in from_rdataframe
    return _impl(rdf, columns, offsets_type)
        columns    = ('x',)
        offsets_type = 'int64_t'
        rdf        = <cppyy.gbl.ROOT.RDataFrame object at 0x56223fcbbc90>
src/awkward/operations/ak_from_rdataframe.py:50: in _impl
    out = ak._connect.rdataframe.from_rdataframe.from_rdataframe(
        awkward    = <module 'awkward' from '/home/jpivarski/irishep/awkward/src/awkward/__init__.py'>
        columns    = ('x',)
        data_frame = <cppyy.gbl.ROOT.RDataFrame object at 0x56223fcbbc90>
        offsets_type = 'int64_t'
        project    = False
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

data_frame = <cppyy.gbl.ROOT.RDataFrame object at 0x56223fcbbc90>, columns = ('x',)
offsets_type = 'int64_t'

    def from_rdataframe(data_frame, columns, offsets_type="int64_t"):
        def cpp_builder_type(depth, data_type):
            if depth == 1:
                return f"awkward::LayoutBuilder::Numpy<{data_type}>>"
            else:
                return (
                    "awkward::LayoutBuilder::ListOffset<int64_t, "
                    + cpp_builder_type(depth - 1, data_type)
                    + ">"
                )

        def cpp_fill_offsets_and_flatten(depth):
            if depth == 1:
                return (
                    "\nfor (auto const& it : vec1) {\n" + "  builder1.append(it);\n" + "}\n"
                )
            else:
                return (
                    f"for (auto const& vec{depth - 1} : vec{depth}) "
                    + "{\n"
                    + f"  auto& builder{depth - 1} = builder{depth}.begin_list();\n"
                    + "  "
                    + cpp_fill_offsets_and_flatten(depth - 1)
                    + "\n"
                    + f"  builder{depth}.end_list();\n"
                    + "}\n"
                )

        def cpp_fill_function(depth):
            if depth == 1:
                return (
                    "template<class BUILDER, typename PRIMITIVE>\n"
                    + "void\n"
                    + "fill_from(BUILDER& builder, ROOT::RDF::RResultPtr<std::vector<PRIMITIVE>>& result) {"
                    + "  for (auto const& it : result) {\n"
                    + "    builder.append(it);\n"
                    + "  }\n"
                    + "}\n"
                )
            else:
                return (
                    "template<class BUILDER, typename PRIMITIVE>\n"
                    + "void\n"
                    + f"fill_offsets_and_flatten{depth}(BUILDER& builder{depth}, ROOT::RDF::RResultPtr<std::vector<PRIMITIVE>>& result) "
                    + "{\n"
                    + f"  for (auto const& vec{depth - 1} : result) "
                    + "{\n"
                    + f"  auto& builder{depth - 1} = builder{depth}.begin_list();\n"
                    + "  "
                    + cpp_fill_offsets_and_flatten(depth - 1)
                    + "\n"
                    + f"  builder{depth}.end_list();\n"
                    + "}\n"
                    + "}\n"
                )

        def form_dtype(form):
            if isinstance(form, ak.forms.NumpyForm) and form.inner_shape == ():
                return primitive_to_dtype(form.primitive)
            elif isinstance(form, ak.forms.ListOffsetForm):
                return form_dtype(form.content)

        # Register Take action for each column
        # 'Take' is a lazy action:
        column_types = {}
        result_ptrs = {}
        contents = {}

        # Important note: This loop is separate from the next one
        # in order not to trigger the additional RDataFrame
        # Event loops
        for col in columns:
            column_types[col] = data_frame.GetColumnType(col)
            result_ptrs[col] = data_frame.Take[column_types[col]](col)

        for col in columns:
            if ROOT.awkward.is_awkward_type[column_types[col]]():  # Retrieve Awkward arrays

                # ROOT::RDF::RResultPtr<T>::begin Returns an iterator to the beginning of
                # the contained object if this makes sense, throw a compilation error otherwise.
                #
                # Does not trigger event loop and execution of all actions booked in
                # the associated RLoopManager.
                lookup = result_ptrs[col].begin().lookup()
                generator = lookup[col].generator
                layout = generator.tolayout(lookup[col], 0, ())
                contents[col] = layout

            else:  # Convert the C++ vectors to Awkward arrays
>               form_str = ROOT.awkward.type_to_form[column_types[col], offsets_type](0)
E               TypeError: Could not instantiate type_to_form<ROOT::VecOps::RVec<int64_t>,int64_t>:
E                 Failed to instantiate "type_to_form<ROOT::VecOps::RVec<int64_t>,int64_t>(int)"

col        = 'x'
column_types = {'x': 'ROOT::VecOps::RVec<int64_t>'}
columns    = ('x',)
contents   = {}
cpp_builder_type = <function from_rdataframe.<locals>.cpp_builder_type at 0x7f9688100af0>
cpp_fill_function = <function from_rdataframe.<locals>.cpp_fill_function at 0x7f96900cb670>
cpp_fill_offsets_and_flatten = <function from_rdataframe.<locals>.cpp_fill_offsets_and_flatten at 0x7f96900cb8b0>
data_frame = <cppyy.gbl.ROOT.RDataFrame object at 0x56223fcbbc90>
form_dtype = <function from_rdataframe.<locals>.form_dtype at 0x7f96900cb550>
offsets_type = 'int64_t'
result_ptrs = {'x': <cppyy.gbl.ROOT.RDF.RResultPtr<vector<ROOT::VecOps::RVec<long> > > object at 0x56223feee230>}

src/awkward/_connect/rdataframe/from_rdataframe.py:155: TypeError
jpivarski commented 1 year ago

(Wrong issue, sorry for the label switch-arounds!)

ianna commented 1 year ago

Oh, is it failing on Windows? I'll double check.

jpivarski commented 1 year ago

This was on Linux, in a test on my local computer. I was in the midst of something with time pressure when I posted this. Let me re-test now, and see if I need to update ROOT or something. (If updating ROOT fixes it, it's still a bug; but let me just see if that's why the CI tests pass.)

jpivarski commented 1 year ago

Could there have been a change in awkward-cpp? I just reinstalled from main from scratch (rebuilding the files with nox -s prepare) and it's all fine now: it works in sequential mode and in parallel. I did not update my ROOT version, so that's not it.

Maybe there's something intermittent? If it comes up again, we can revisit this, but you can't debug an error that you can't reproduce, so I'll close this now.