scikit-hep / uproot5

ROOT I/O in pure Python and NumPy.
https://uproot.readthedocs.io
BSD 3-Clause "New" or "Revised" License
239 stars 76 forks source link

Reading TNtuple into an array #1270

Open AndiZm opened 3 months ago

AndiZm commented 3 months ago

Tested with uproot version 5.3.10

When trying to read a TNtuple as array (in this example, using library np, but there is a similar error when using e.g. pd or not specifying it), we get an error.

This is the minimal example of the code

import uproot
file = uproot.open('file.root')
tree = file["Pairs"]
opd = tree["OpdEtcBr"]
arr = opd.array(library="np")

We are getting a DeserializationError:

---------------------------------------------------------------------------
DeserializationError                      Traceback (most recent call last)
Input In [13], in <cell line: 1>()
----> 1 arr = opd.array(library="np")

File ~\Anaconda3\lib\site-packages\uproot\behaviors\TBranch.py:1819, in TBranch.array(self, interpretation, entry_start, entry_stop, decompression_executor, interpretation_executor, array_cache, library, ak_add_doc)
   1816                 ranges_or_baskets.append((branch, basket_num, range_or_basket))
   1818 interp_options = {"ak_add_doc": ak_add_doc}
-> 1819 _ranges_or_baskets_to_arrays(
   1820     self,
   1821     ranges_or_baskets,
   1822     branchid_interpretation,
   1823     entry_start,
   1824     entry_stop,
   1825     decompression_executor,
   1826     interpretation_executor,
   1827     library,
   1828     arrays,
   1829     False,
   1830     interp_options,
   1831 )
   1833 _fix_asgrouped(
   1834     arrays,
   1835     expression_context,
   (...)
   1839     ak_add_doc,
   1840 )
   1842 if array_cache is not None:

File ~\Anaconda3\lib\site-packages\uproot\behaviors\TBranch.py:3105, in _ranges_or_baskets_to_arrays(hasbranches, ranges_or_baskets, branchid_interpretation, entry_start, entry_stop, decompression_executor, interpretation_executor, library, arrays, update_ranges_or_baskets, interp_options)
   3102     pass
   3104 elif isinstance(obj, tuple) and len(obj) == 3:
-> 3105     uproot.source.futures.delayed_raise(*obj)
   3107 else:
   3108     raise AssertionError(obj)

File ~\Anaconda3\lib\site-packages\uproot\source\futures.py:38, in delayed_raise(exception_class, exception_value, traceback)
     34 def delayed_raise(exception_class, exception_value, traceback):
     35     """
     36     Raise an exception from a background thread on the main thread.
     37     """
---> 38     raise exception_value.with_traceback(traceback)

File ~\Anaconda3\lib\site-packages\uproot\behaviors\TBranch.py:3054, in _ranges_or_baskets_to_arrays.<locals>.basket_to_array(basket)
   3051         context = dict(branch.context)
   3052         context["forth"] = forth_context[branch.cache_key]
-> 3054         basket_arrays[basket.basket_num] = interpretation.basket_array(
   3055             basket.data,
   3056             basket.byte_offsets,
   3057             basket,
   3058             branch,
   3059             context,
   3060             basket.member("fKeylen"),
   3061             library,
   3062             interp_options,
   3063         )
   3064         if basket.num_entries != len(basket_arrays[basket.basket_num]):
   3065             raise ValueError(
   3066                 f"""basket {basket.basket_num} in tree/branch {branch.object_path} has the wrong number of entries """
   3067                 f"""(expected {basket.num_entries}, obtained {len(basket_arrays[basket.basket_num])}) when interpreted as {interpretation}
   3068 in file {branch.file.file_path}"""
   3069             )

File ~\Anaconda3\lib\site-packages\uproot\interpretation\objects.py:174, in AsObjects.basket_array(self, data, byte_offsets, basket, branch, context, cursor_offset, library, options)
    162     output = self.basket_array_forth(
    163         data,
    164         byte_offsets,
   (...)
    170         options,
    171     )
    173 else:
--> 174     output = ObjectArray(
    175         self._model, branch, context, byte_offsets, data, cursor_offset
    176     ).to_numpy()
    178 self.hook_after_basket_array(
    179     data=data,
    180     byte_offsets=byte_offsets,
   (...)
    187     options=options,
    188 )
    190 return output

File ~\Anaconda3\lib\site-packages\uproot\interpretation\objects.py:993, in ObjectArray.to_numpy(self)
    991 output = numpy.empty(len(self), dtype=numpy.dtype(object))
    992 for i in range(len(self)):
--> 993     output[i] = self[i]
    994 return output

File ~\Anaconda3\lib\site-packages\uproot\interpretation\objects.py:1008, in ObjectArray.__getitem__(self, where)
   1004     chunk = uproot.source.chunk.Chunk.wrap(self._branch.file.source, data)
   1005     cursor = uproot.source.cursor.Cursor(
   1006         0, origin=-(byte_start + self._cursor_offset)
   1007     )
-> 1008     return self._model.read(
   1009         chunk,
   1010         cursor,
   1011         self._context,
   1012         self._branch.file,
   1013         self._detached_file,
   1014         self._branch,
   1015     )
   1017 elif isinstance(where, slice):
   1018     return ObjectArray(
   1019         self._model,
   1020         self._branch,
   (...)
   1024         self._cursor_offset,
   1025     )

File ~\Anaconda3\lib\site-packages\uproot\model.py:1362, in DispatchByVersion.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
   1354             raise ValueError(
   1355                 f"""Unknown version {version} for class {classname_decode(cls.__name__)[0]} that cannot be skipped """
   1356                 """because its number of bytes is unknown.
   1357 """
   1358             )
   1360         # versioned_cls.read starts with numbytes_version again because move=False (above)
   1361         temp_var = cls.postprocess(
-> 1362             versioned_cls.read(
   1363                 chunk, cursor, context, file, selffile, parent, concrete=concrete
   1364             ),
   1365             chunk,
   1366             cursor,
   1367             context,
   1368             file,
   1369         )
   1371         if forth_obj is not None:
   1372             forth_obj.pop_active_node()

File ~\Anaconda3\lib\site-packages\uproot\model.py:854, in Model.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
    852     forth_obj.add_node(forth_stash)
    853     forth_obj.push_active_node(forth_stash)
--> 854 self.read_members(chunk, cursor, context, file)
    855 if forth_obj is not None:
    856     forth_obj.pop_active_node()

File <dynamic>:19, in read_members(self, chunk, cursor, context, file)

File ~\Anaconda3\lib\site-packages\uproot\model.py:1362, in DispatchByVersion.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
   1354             raise ValueError(
   1355                 f"""Unknown version {version} for class {classname_decode(cls.__name__)[0]} that cannot be skipped """
   1356                 """because its number of bytes is unknown.
   1357 """
   1358             )
   1360         # versioned_cls.read starts with numbytes_version again because move=False (above)
   1361         temp_var = cls.postprocess(
-> 1362             versioned_cls.read(
   1363                 chunk, cursor, context, file, selffile, parent, concrete=concrete
   1364             ),
   1365             chunk,
   1366             cursor,
   1367             context,
   1368             file,
   1369         )
   1371         if forth_obj is not None:
   1372             forth_obj.pop_active_node()

File ~\Anaconda3\lib\site-packages\uproot\model.py:854, in Model.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
    852     forth_obj.add_node(forth_stash)
    853     forth_obj.push_active_node(forth_stash)
--> 854 self.read_members(chunk, cursor, context, file)
    855 if forth_obj is not None:
    856     forth_obj.pop_active_node()

File ~\Anaconda3\lib\site-packages\uproot\models\TTree.py:770, in Model_TTree_v20.read_members(self, chunk, cursor, context, file)
    766     cursor.skip(1)
    767 self._members["fClusterSize"] = cursor.array(
    768     chunk, self.member("fNClusterRange"), tmp, context
    769 )
--> 770 self._members["fIOFeatures"] = file.class_named("ROOT::TIOFeatures").read(
    771     chunk, cursor, context, file, self._file, self.concrete
    772 )
    773 self._members["fBranches"] = file.class_named("TObjArray").read(
    774     chunk, cursor, context, file, self._file, self.concrete
    775 )
    776 self._members["fLeaves"] = file.class_named("TObjArray").read(
    777     chunk, cursor, context, file, self._file, self.concrete
    778 )

File ~\Anaconda3\lib\site-packages\uproot\model.py:862, in Model.read(cls, chunk, cursor, context, file, selffile, parent, concrete)
    856         forth_obj.pop_active_node()
    858     self.hook_after_read_members(
    859         chunk=chunk, cursor=cursor, context=context, file=file
    860     )
--> 862 self.check_numbytes(chunk, cursor, context)
    864 self.hook_before_postprocess(
    865     chunk=chunk, cursor=cursor, context=context, file=file
    866 )
    868 out = self.postprocess(chunk, cursor, context, file)

File ~\Anaconda3\lib\site-packages\uproot\model.py:936, in Model.check_numbytes(self, chunk, cursor, context)
    916 """
    917 Args:
    918     chunk (:doc:`uproot.source.chunk.Chunk`): Buffer of contiguous data
   (...)
    932 likely.
    933 """
    934 import uproot.deserialization
--> 936 uproot.deserialization.numbytes_check(
    937     chunk,
    938     self._cursor,
    939     cursor,
    940     self._num_bytes,
    941     self.classname,
    942     context,
    943     getattr(self._file, "file_path", None),
    944 )

File ~\Anaconda3\lib\site-packages\uproot\deserialization.py:175, in numbytes_check(chunk, start_cursor, stop_cursor, num_bytes, classname, context, file_path)
    173 observed = stop_cursor.displacement(start_cursor)
    174 if observed != num_bytes:
--> 175     raise DeserializationError(
    176         f"""expected {num_bytes} bytes but cursor moved by {observed} bytes (through {classname})""",
    177         chunk,
    178         stop_cursor,
    179         context,
    180         file_path,
    181     )

DeserializationError: while reading

    TNtuple version 2 as uproot.dynamic.Model_TNtuple_v2 (236286 bytes)
        TTree version 20 as uproot.models.TTree.Model_TTree_v20 (236276 bytes)
            (base): <TNamed 'OPDs' title='OPDs etc for LST01-LST02' at 0x015aef2dcd30>
            (base): <TAttLine (version 2) at 0x015aef2dcc40>
            (base): <TAttFill (version 2) at 0x015aef2dc670>
            (base): <TAttMarker (version 2) at 0x015aef2dcbb0>
            fEntries: 7212
            fTotBytes: 0
            fZipBytes: 0
            fSavedBytes: 0
            fFlushedBytes: 0
            fWeight: 1.0
            fTimerInterval: 0
            fScanField: 25
            fUpdate: 0
            fDefaultEntryOffsetLen: 1000
            fNClusterRange: 0
            fMaxEntries: 1000000000000
            fMaxEntryLoop: 1000000000000
            fMaxVirtualSize: 0
            fAutoSave: -300000000
            fAutoFlush: -30000000
            fEstimate: 1000000
            fClusterRangeEnd: array([], dtype='>i8')
            fClusterSize: array([], dtype='>i8')
            ROOT::TIOFeatures version 0 as uproot.models.TTree.Model_ROOT_3a3a_TIOFeatures (11 bytes)
                fIOBits: 194
Members for ROOT::TIOFeatures: fIOBits?

expected 11 bytes but cursor moved by 15 bytes (through ROOT::TIOFeatures)
in file file.root
in object /Pairs;2