icesat2py / icepyx

Python tools for obtaining and working with ICESat-2 data
https://icepyx.readthedocs.io/en/latest/
BSD 3-Clause "New" or "Revised" License
214 stars 107 forks source link

read-in: the index error on ATL08 #376

Closed liuh886 closed 11 months ago

liuh886 commented 2 years ago

I got a index error on ATL08 similar to these cases, but I am using Icepyx version 0.6.3:

Here is a minimal reproduction. I conduct it on two PC with the same result:

>>> region_a = ipx.Query('ATL08', any_spatial_extent, any_date_range)
>>> region_a.order_vars.wanted
>>> region_a.order_vars.append(keyword_list=['land', 'land_segments','terrain'])
>>> region_a.order_granules(Coverage=region_a.order_vars.wanted)
>>> region_a.download_granules(path)
>>> pattern = "processed_ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5"
>>> reader = ipx.Read(data_source=path, product="ATL08", filename_pattern=pattern)
You have 1 files matching the filename pattern to be read in.

>>> reader.vars.wanted
>>> reader.vars.append(var_list=['h_te_best_fit'])
>>> ds = reader.load()

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Input In [191], in <cell line: 1>()
----> 1 ds = reader.load()

File ~\.conda\envs\snowdepth\lib\site-packages\icepyx\core\read.py:542, in Read.load(self)
    535 # DevNote: I'd originally hoped to rely on intake-xarray in order to not have to iterate through the files myself,
    536 # by providing a generalized url/source in building the catalog.
    537 # However, this led to errors when I tried to combine two identical datasets because the single dimension was equal.
    538 # In these situations, xarray recommends manually controlling the merge/concat process yourself.
    539 # While unlikely to be a broad issue, I've heard of multiple matching timestamps causing issues for combining multiple IS2 datasets.
    540 for file in self._filelist:
    541     all_dss.append(
--> 542         self._build_single_file_dataset(file, groups_list)
    543     )  # wanted_groups, vgrp.keys()))
    545 if len(all_dss) == 1:
    546     return all_dss[0]

File ~\.conda\envs\snowdepth\lib\site-packages\icepyx\core\read.py:682, in Read._build_single_file_dataset(self, file, groups_list)
    680 grp_path = wanted_groups_list[0]
    681 wanted_groups_list = wanted_groups_list[1:]
--> 682 ds = self._read_single_grp(file, grp_path)
    683 is2ds, ds = Read._add_vars_to_ds(
    684     is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
    685 )
    687 # if there are any deeper nested variables, get those so they have actual coordinates and add them

File ~\.conda\envs\snowdepth\lib\site-packages\icepyx\core\read.py:602, in Read._read_single_grp(self, file, grp_path)
    598 try:
    599     grpcat = is2cat.build_catalog(
    600         file, self._pattern, self._source_type, grp_paths=grp_path
    601     )
--> 602     ds = grpcat[self._source_type].read()
    604 # NOTE: could also do this with h5py, but then would have to read in each variable in the group separately
    605 except ValueError:

File ~\.conda\envs\snowdepth\lib\site-packages\intake_xarray\base.py:39, in DataSourceMixin.read(self)
     37 def read(self):
     38     """Return a version of the xarray with all the data in memory"""
---> 39     self._load_metadata()
     40     return self._ds.load()

File ~\.conda\envs\snowdepth\lib\site-packages\intake\source\base.py:236, in DataSourceBase._load_metadata(self)
    234 """load metadata only if needed"""
    235 if self._schema is None:
--> 236     self._schema = self._get_schema()
    237     self.dtype = self._schema.dtype
    238     self.shape = self._schema.shape

File ~\.conda\envs\snowdepth\lib\site-packages\intake_xarray\base.py:18, in DataSourceMixin._get_schema(self)
     15 self.urlpath = self._get_cache(self.urlpath)[0]
     17 if self._ds is None:
---> 18     self._open_dataset()
     20     metadata = {
     21         'dims': dict(self._ds.dims),
     22         'data_vars': {k: list(self._ds[k].coords)
     23                       for k in self._ds.data_vars.keys()},
     24         'coords': tuple(self._ds.coords.keys()),
     25     }
     26     if getattr(self, 'on_server', False):

File ~\.conda\envs\snowdepth\lib\site-packages\intake_xarray\netcdf.py:92, in NetCDFSource._open_dataset(self)
     88 else:
     89     # https://github.com/intake/filesystem_spec/issues/476#issuecomment-732372918
     90     url = fsspec.open(self.urlpath, **self.storage_options).open()
---> 92 self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\api.py:531, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
    519 decoders = _resolve_decoders_kwargs(
    520     decode_cf,
    521     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    527     decode_coords=decode_coords,
    528 )
    530 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 531 backend_ds = backend.open_dataset(
    532     filename_or_obj,
    533     drop_variables=drop_variables,
    534     **decoders,
    535     **kwargs,
    536 )
    537 ds = _dataset_from_backend_dataset(
    538     backend_ds,
    539     filename_or_obj,
   (...)
    547     **kwargs,
    548 )
    549 return ds

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\h5netcdf_.py:401, in H5netcdfBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, format, group, lock, invalid_netcdf, phony_dims, decode_vlen_strings)
    389 store = H5NetCDFStore.open(
    390     filename_or_obj,
    391     format=format,
   (...)
    396     decode_vlen_strings=decode_vlen_strings,
    397 )
    399 store_entrypoint = StoreBackendEntrypoint()
--> 401 ds = store_entrypoint.open_dataset(
    402     store,
    403     mask_and_scale=mask_and_scale,
    404     decode_times=decode_times,
    405     concat_characters=concat_characters,
    406     decode_coords=decode_coords,
    407     drop_variables=drop_variables,
    408     use_cftime=use_cftime,
    409     decode_timedelta=decode_timedelta,
    410 )
    411 return ds

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\store.py:26, in StoreBackendEntrypoint.open_dataset(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
     14 def open_dataset(
     15     self,
     16     store,
   (...)
     24     decode_timedelta=None,
     25 ):
---> 26     vars, attrs = store.load()
     27     encoding = store.get_encoding()
     29     vars, attrs, coord_names = conventions.decode_cf_variables(
     30         vars,
     31         attrs,
   (...)
     38         decode_timedelta=decode_timedelta,
     39     )

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\common.py:125, in AbstractDataStore.load(self)
    103 def load(self):
    104     """
    105     This loads the variables and attributes simultaneously.
    106     A centralized loading function makes it easier to create
   (...)
    122     are requested, so care should be taken to make sure its fast.
    123     """
    124     variables = FrozenDict(
--> 125         (_decode_variable_name(k), v) for k, v in self.get_variables().items()
    126     )
    127     attributes = FrozenDict(self.get_attrs())
    128     return variables, attributes

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\h5netcdf_.py:232, in H5NetCDFStore.get_variables(self)
    231 def get_variables(self):
--> 232     return FrozenDict(
    233         (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items()
    234     )

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\core\utils.py:474, in FrozenDict(*args, **kwargs)
    473 def FrozenDict(*args, **kwargs) -> Frozen:
--> 474     return Frozen(dict(*args, **kwargs))

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\h5netcdf_.py:233, in <genexpr>(.0)
    231 def get_variables(self):
    232     return FrozenDict(
--> 233         (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items()
    234     )

File ~\.conda\envs\snowdepth\lib\site-packages\xarray\backends\h5netcdf_.py:197, in H5NetCDFStore.open_store_variable(self, name, var)
    194 def open_store_variable(self, name, var):
    195     import h5py
--> 197     dimensions = var.dimensions
    198     data = indexing.LazilyIndexedArray(H5NetCDFArrayWrapper(name, self))
    199     attrs = _read_attributes(var)

File ~\.conda\envs\snowdepth\lib\site-packages\h5netcdf\core.py:252, in BaseVariable.dimensions(self)
    250 """Return variable dimension names."""
    251 if self._dimensions is None:
--> 252     self._dimensions = self._lookup_dimensions()
    253 return self._dimensions

File ~\.conda\envs\snowdepth\lib\site-packages\h5netcdf\core.py:148, in BaseVariable._lookup_dimensions(self)
    145 # normal variable carrying DIMENSION_LIST
    146 # extract hdf5 file references and get objects name
    147 if "DIMENSION_LIST" in attrs:
--> 148     return tuple(
    149         self._root._h5file[ref[0]].name.split("/")[-1]
    150         for ref in list(self._h5ds.attrs.get("DIMENSION_LIST", []))
    151     )
    153 # need to use the h5ds name here to distinguish from collision dimensions
    154 child_name = self._h5ds.name.split("/")[-1]

File ~\.conda\envs\snowdepth\lib\site-packages\h5netcdf\core.py:149, in <genexpr>(.0)
    145 # normal variable carrying DIMENSION_LIST
    146 # extract hdf5 file references and get objects name
    147 if "DIMENSION_LIST" in attrs:
    148     return tuple(
--> 149         self._root._h5file[ref[0]].name.split("/")[-1]
    150         for ref in list(self._h5ds.attrs.get("DIMENSION_LIST", []))
    151     )
    153 # need to use the h5ds name here to distinguish from collision dimensions
    154 child_name = self._h5ds.name.split("/")[-1]

IndexError: index 0 is out of bounds for axis 0 with size 0

If this is not the same bug, I can post a new issue.

-)

Originally posted by @liuh886 in https://github.com/icesat2py/icepyx/issues/281#issuecomment-1258675815

JessicaS11 commented 2 years ago

Hello @liuh886 - thanks for posting your issue and your patience in my reply (I was at a workshop last week). I've been able to reproduce your issue (thanks for the example code - that was extremely helpful), but have not yet drilled down into why it's happening (i.e. what may be different about your use case versus the ones we tested). I'll continue working on this, and would be happy to hear about any new insights you have.

JessicaS11 commented 11 months ago

@liuh886 - I just wanted to let you know that this issue should be fixed in our most recent release v0.8.1. Thanks for your patience as we resolved it and please let us know if you have any further issues.