Closed zdlawrence closed 1 year ago
Thank you for reporting this, please could you share the errors you get?
I am able to view these files ok with the ncdump
command
All three files give the same errors when I try to read them in python with xarray, using both the standard netcdf4 and h5netcdf engines. These are the only files I've encountered that give me these errors.
In [1]: import xarray as xr
In [2]: ds = xr.open_dataset("/badc/snap/data/post-cmip6/SNAPSI/Meteo-France/CNRM-CM61/nudged/s20191001/r7i1p1f1/6hrPt/ta/gr/v20230215/ta_6hrPt_CNRM-CM61_nudged_s20191001-r7i1p
...: 1f1_gr_20191001-20191114.nc").load()
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[2], line 1
----> 1 ds = xr.open_dataset("/badc/snap/data/post-cmip6/SNAPSI/Meteo-France/CNRM-CM61/nudged/s20191001/r7i1p1f1/6hrPt/ta/gr/v20230215/ta_6hrPt_CNRM-CM61_nudged_s20191001-r7i1p1f1_gr_20191001-20191114.nc").load()
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/dataset.py:800, in Dataset.load(self, **kwargs)
798 for k, v in self.variables.items():
799 if k not in lazy_data:
--> 800 v.load()
802 return self
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/variable.py:546, in Variable.load(self, **kwargs)
544 self._data = as_compatible_data(loaded_data)
545 elif isinstance(self._data, indexing.ExplicitlyIndexed):
--> 546 self._data = self._data.get_duck_array()
547 elif not is_duck_array(self._data):
548 self._data = np.asarray(self._data)
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:696, in MemoryCachedArray.get_duck_array(self)
695 def get_duck_array(self):
--> 696 self._ensure_cached()
697 return self.array.get_duck_array()
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:690, in MemoryCachedArray._ensure_cached(self)
554 # and self.key is BasicIndexer((slice(None, None, None),))
552 # self.array[self.key] is now a numpy array when
553 # self.array is a BackendArray subclass
554 # and self.key is BasicIndexer((slice(None, None, None),))
555 # so we need the explicit check for ExplicitlyIndexed
556 if isinstance(array, ExplicitlyIndexed):
--> 557 array = array.get_duck_array()
558 return _wrap_numpy_scalars(array)
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/coding/variables.py:74, in _ElementwiseFunctionArray.get_duck_array(self)
73 def get_duck_array(self):
---> 74 return self.func(self.array.get_duck_array())
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:551, in LazilyIndexedArray.get_duck_array(self)
550 def get_duck_array(self):
--> 551 array = self.array[self.key]
552 # self.array[self.key] is now a numpy array when
553 # self.array is a BackendArray subclass
554 # and self.key is BasicIndexer((slice(None, None, None),))
555 # so we need the explicit check for ExplicitlyIndexed
556 if isinstance(array, ExplicitlyIndexed):
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:100, in NetCDF4ArrayWrapper.__getitem__(self, key)
99 def __getitem__(self, key):
--> 100 return indexing.explicit_indexing_adapter(
101 key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
102 )
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:858, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
836 """Support explicit indexing by delegating to a raw indexing method.
837
838 Outer and/or vectorized indexers are supported by indexing a second time
(...)
855 Indexing result, in the form of a duck numpy-array.
856 """
857 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 858 result = raw_indexing_method(raw_key.tuple)
859 if numpy_indices.tuple:
860 # index the loaded np.ndarray
861 result = NumpyIndexingAdapter(result)[numpy_indices]
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:113, in NetCDF4ArrayWrapper._getitem(self, key)
111 with self.datastore.lock:
112 original_array = self.get_array(needs_lock=False)
--> 113 array = getitem(original_array, key)
114 except IndexError:
115 # Catch IndexError in netCDF4 and return a more informative
116 # error message. This is most often called when an unsorted
117 # indexer is used before the data is loaded from disk.
118 msg = (
119 "The indexing operation you are attempting to perform "
120 "is not valid on netCDF4.Variable object. Try loading "
121 "your data into memory first by calling .load()."
122 )
File src/netCDF4/_netCDF4.pyx:4958, in netCDF4._netCDF4.Variable.__getitem__()
File src/netCDF4/_netCDF4.pyx:5916, in netCDF4._netCDF4.Variable._get()
File src/netCDF4/_netCDF4.pyx:2029, in netCDF4._netCDF4._ensure_nc_success()
RuntimeError: NetCDF: HDF error
In [3]: ds = xr.open_dataset("/badc/snap/data/post-cmip6/SNAPSI/Meteo-France/CNRM-CM61/nudged/s20191001/r7i1p1f1/6hrPt/ta/gr/v20230215/ta_6hrPt_CNRM-CM61_nudged_s20191001-r7i1p1f1_gr_20191001-20191114.nc", engine="h5netcdf").load()
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Cell In[3], line 1
----> 1 ds = xr.open_dataset("/badc/snap/data/post-cmip6/SNAPSI/Meteo-France/CNRM-CM61/nudged/s20191001/r7i1p1f1/6hrPt/ta/gr/v20230215/ta_6hrPt_CNRM-CM61_nudged_s20191001-r7i1p1f1_gr_20191001-20191114.nc", engine="h5netcdf").load()
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/dataset.py:800, in Dataset.load(self, **kwargs)
798 for k, v in self.variables.items():
799 if k not in lazy_data:
--> 800 v.load()
802 return self
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/variable.py:546, in Variable.load(self, **kwargs)
544 self._data = as_compatible_data(loaded_data)
545 elif isinstance(self._data, indexing.ExplicitlyIndexed):
--> 546 self._data = self._data.get_duck_array()
547 elif not is_duck_array(self._data):
548 self._data = np.asarray(self._data)
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:696, in MemoryCachedArray.get_duck_array(self)
695 def get_duck_array(self):
--> 696 self._ensure_cached()
697 return self.array.get_duck_array()
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:690, in MemoryCachedArray._ensure_cached(self)
689 def _ensure_cached(self):
--> 690 self.array = as_indexable(self.array.get_duck_array())
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:664, in CopyOnWriteArray.get_duck_array(self)
663 def get_duck_array(self):
--> 664 return self.array.get_duck_array()
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:557, in LazilyIndexedArray.get_duck_array(self)
552 # self.array[self.key] is now a numpy array when
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/coding/variables.py:74, in _ElementwiseFunctionArray.get_duck_array(self)
73 def get_duck_array(self):
---> 74 return self.func(self.array.get_duck_array())
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:551, in LazilyIndexedArray.get_duck_array(self)
550 def get_duck_array(self):
--> 551 array = self.array[self.key]
552 # self.array[self.key] is now a numpy array when
553 # self.array is a BackendArray subclass
554 # and self.key is BasicIndexer((slice(None, None, None),))
555 # so we need the explicit check for ExplicitlyIndexed
556 if isinstance(array, ExplicitlyIndexed):
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/backends/h5netcdf_.py:51, in H5NetCDFArrayWrapper.__getitem__(self, key)
50 def __getitem__(self, key):
---> 51 return indexing.explicit_indexing_adapter(
52 key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem
53 )
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/core/indexing.py:858, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
836 """Support explicit indexing by delegating to a raw indexing method.
837
838 Outer and/or vectorized indexers are supported by indexing a second time
(...)
855 Indexing result, in the form of a duck numpy-array.
856 """
857 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 858 result = raw_indexing_method(raw_key.tuple)
859 if numpy_indices.tuple:
860 # index the loaded np.ndarray
861 result = NumpyIndexingAdapter(result)[numpy_indices]
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/xarray/backends/h5netcdf_.py:58, in H5NetCDFArrayWrapper._getitem(self, key)
56 with self.datastore.lock:
57 array = self.get_array(needs_lock=False)
---> 58 return array[key]
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/h5netcdf/core.py:347, in BaseVariable.__getitem__(self, key)
339 fv = self.dtype.type(self._h5ds.fillvalue)
340 return np.pad(
341 self._h5ds,
342 pad_width=padding,
343 mode="constant",
344 constant_values=fv,
345 )[key]
--> 347 return self._h5ds[key]
File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()
File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()
File ~/mambaforge/envs/py311/lib/python3.11/site-packages/h5py/_hl/dataset.py:758, in Dataset.__getitem__(self, args, new_dtype)
756 if self._fast_read_ok and (new_dtype is None):
757 try:
--> 758 return self._fast_reader.read(args)
759 except TypeError:
760 pass # Fall back to Python read pathway below
File h5py/_selector.pyx:376, in h5py._selector.Reader.read()
OSError: Can't synchronously read data (wrong B-tree signature)
Thank you for raising these errors, the 3 corrupt files have been replaced in the archive and should open ok now.
Thank you @mollymacrae - I can confirm that I am able to read the files and that the jobs that were failing before now complete without issue.
I have been processing some of the basic fields (ua, va, ta, wap, zg) for the group-SNAPSI effort. In doing so, I've run into a couple of corrupted netcdf files that give HDF errors when trying to read them:
Meteo-France/CNRM-CM61/nudged/s20191001/r7i1p1f1/6hrPt/ta/gr/v20230215/ta_6hrPt_CNRM-CM61_nudged_s20191001-r7i1p1f1_gr_20191001-20191114.nc
Meteo-France/CNRM-CM61/nudged/s20191001/r8i1p1f1/6hrPt/ta/gr/v20230215/ta_6hrPt_CNRM-CM61_nudged_s20191001-r8i1p1f1_gr_20191001-20191114.nc
UKMO/GloSea6/control-full/s20180125/r10i1p1f1/6hrPt/wap/gn/v20230403/wap_6hrPt_GloSea6_control-full_s20180125-r10i1p1f1_gn_201801250600-201803260000.nc
When/if I encounter more, I will post them here.