NCAS-CMS / pyfive

A pure Python HDF5 file reader
BSD 3-Clause "New" or "Revised" License
1 stars 1 forks source link

Need to a void a horrible stack dump from asking for chunk addresses from an unchunked file #1

Open bnlawrence opened 6 months ago

bnlawrence commented 6 months ago

Currently we get a horrible stack dump for

import pyfive
f = pyfive.File('zero.nc')
v = f['var']
d = v._dataobjects
d._get_chunk_addresses()

which gives:

File ~/Repositories/pyfive/pyfive/dataobjects.py:743, in DatasetDataObject._get_chunk_addresses(self)
    739 if self._zchunk_index == {}:
    741     self._get_chunk_params()
--> 743     self.chunk_btree = BTreeV1RawDataChunks(
    744         self.fh, self._chunk_address, self._chunk_dims)
    746     count = np.prod(self.shape)
    747     itemsize = np.dtype(self.dtype).itemsize

File ~/Repositories/pyfive/pyfive/btree.py:129, in BTreeV1RawDataChunks.__init__(self, fh, offset, dims)
    127 """ initalize. """
    128 self.dims = dims
--> 129 super().__init__(fh, offset)

File ~/Repositories/pyfive/pyfive/btree.py:25, in AbstractBTree.__init__(self, fh, offset)
     22 self.depth = None
     23 self.all_nodes = {}
---> 25 self._read_root_node()
     26 self._read_children()

File ~/Repositories/pyfive/pyfive/btree.py:39, in AbstractBTree._read_root_node(self)
     38 def _read_root_node(self):
---> 39     root_node = self._read_node(self.offset, None)
     40     self._add_node(root_node)
     41     self.depth = root_node['node_level']

File ~/Repositories/pyfive/pyfive/btree.py:133, in BTreeV1RawDataChunks._read_node(self, offset, node_level)
    131 def _read_node(self, offset, node_level):
    132     """ Return a single node in the b-tree located at a give offset. """
--> 133     node = self._read_node_header(offset, node_level)
    134     keys = []
    135     addresses = []

File ~/Repositories/pyfive/pyfive/btree.py:81, in BTreeV1._read_node_header(self, offset, node_level)
     79 def _read_node_header(self, offset, node_level):
     80     """ Return a single node header in the b-tree located at a give offset. """
---> 81     self.fh.seek(offset)
     82     node = _unpack_struct_from_file(self.B_LINK_NODE, self.fh)
     83     assert node['signature'] == b'TREE'

TypeError: 'NoneType' object cannot be interpreted as an integer

for a file with the following signature:

netcdf zero {
dimensions:
    y = 3 ;
    x = 4 ;
variables:
    float var(y, x) ;
        var:_Storage = "contiguous" ;
        var:_Endianness = "little" ;

// global attributes:
        :_NCProperties = "version=2,netcdf=4.9.2,hdf5=1.14.3" ;
        :_SuperblockVersion = 2 ;
        :_IsNetcdf4 = 1 ;
        :_Format = "netCDF-4" ;
}
bnlawrence commented 6 months ago

In fixing this we might want to change the name of this method to _load_chunk_indexes to better represent what it does. We might also ask, do we need to go to the b-tree given it seems the b-tree is already loaded at this point?