Loading data using src.data_loading.load_ens.load_ens_data_D(file) throws an OSError.
It's unclear from the trace, but the root may be related to running out of RAM, similar to ecmwf/cfgrib#340, although there should be enough for this not to be a problem given the size of data (~3GB files, over 16GB RAM free).
Full trace:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Cell In[3], line 1
----> 1 dsD = load_ens_data_D(file)
File [c:\Users\zjh54983\Downloads\hackathon\delorean-datasets\notebooks\..\src\data_loading\load_ens.py:231](file:///C:/Users/zjh54983/Downloads/hackathon/delorean-datasets/src/data_loading/load_ens.py:231), in load_ens_data_D(fn_D, drop_wind_components, temperature_in_C, calculate_diffs, verbose)
228 # loading everything is slower:
229 # 51.2 s ± 6.44 s
230 logging.info(f"Loading {fn_D} (full)")
--> 231 dsD = xr.load_dataset(fn_D, engine='cfgrib', chunks=10)
233 # dsD = calculate_wind_speed(dsD, drop_uv= drop_wind_components,verbose=verbose)
234 # if temperature_in_C:
235 # dsD = calculate_temperature_in_C(dsD)
236 # if calculate_diffs:
237 # dsD = get_diff_values(dsD,verbose=verbose)
238 # logging.info(f"Loading complete")
239 return dsD
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\backends\api.py:275](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/backends/api.py:275), in load_dataset(filename_or_obj, **kwargs)
272 raise TypeError("cache has no effect in this context")
274 with open_dataset(filename_or_obj, **kwargs) as ds:
--> 275 return ds.load()
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\dataset.py:792](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/dataset.py:792), in Dataset.load(self, **kwargs)
789 chunkmanager = get_chunked_array_type(*lazy_data.values())
791 # evaluate all the chunked arrays simultaneously
--> 792 evaluated_data = chunkmanager.compute(*lazy_data.values(), **kwargs)
794 for k, data in zip(lazy_data, evaluated_data):
795 self.variables[k].data = data
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\daskmanager.py:70](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/daskmanager.py:70), in DaskManager.compute(self, *data, **kwargs)
67 def compute(self, *data: DaskArray, **kwargs) -> tuple[np.ndarray, ...]:
68 from dask.array import compute
---> 70 return compute(*data, **kwargs)
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\threaded.py:89](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/threaded.py:89), in get(dsk, keys, cache, num_workers, pool, **kwargs)
86 elif isinstance(pool, multiprocessing.pool.Pool):
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
101 with pools_lock:
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\local.py:511](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/local.py:511), in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
509 _execute_task(task, data) # Re-execute locally
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
513 state["cache"][key] = res
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\local.py:319](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/local.py:319), in reraise(exc, tb)
317 if exc.__traceback__ is not tb:
318 raise exc.with_traceback(tb)
--> 319 raise exc
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\local.py:224](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/local.py:224), in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
222 try:
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
226 result = dumps((result, id))
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:484](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:484), in ImplicitToExplicitIndexingAdapter.__array__(self, dtype)
483 def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
--> 484 return np.asarray(self.get_duck_array(), dtype=dtype)
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:487](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:487), in ImplicitToExplicitIndexingAdapter.get_duck_array(self)
486 def get_duck_array(self):
--> 487 return self.array.get_duck_array()
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:664](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:664), in CopyOnWriteArray.get_duck_array(self)
663 def get_duck_array(self):
--> 664 return self.array.get_duck_array()
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:551](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:551), in LazilyIndexedArray.get_duck_array(self)
550 def get_duck_array(self):
--> 551 array = self.array[self.key]
552 # self.array[self.key] is now a numpy array when
553 # self.array is a BackendArray subclass
554 # and self.key is BasicIndexer((slice(None, None, None),))
555 # so we need the explicit check for ExplicitlyIndexed
556 if isinstance(array, ExplicitlyIndexed):
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\xarray_plugin.py:155](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/xarray_plugin.py:155), in CfGribArrayWrapper.__getitem__(self, key)
151 def __getitem__(
152 self,
153 key: xr.core.indexing.ExplicitIndexer,
154 ) -> np.ndarray:
--> 155 return xr.core.indexing.explicit_indexing_adapter(
156 key, self.shape, xr.core.indexing.IndexingSupport.BASIC, self._getitem
157 )
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:858](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:858), in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
836 """Support explicit indexing by delegating to a raw indexing method.
837
838 Outer and/or vectorized indexers are supported by indexing a second time
(...)
855 Indexing result, in the form of a duck numpy-array.
856 """
857 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 858 result = raw_indexing_method(raw_key.tuple)
859 if numpy_indices.tuple:
860 # index the loaded np.ndarray
861 result = NumpyIndexingAdapter(result)[numpy_indices]
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\xarray_plugin.py:164](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/xarray_plugin.py:164), in CfGribArrayWrapper._getitem(self, key)
159 def _getitem(
160 self,
161 key: T.Tuple[T.Any, ...],
162 ) -> np.ndarray:
163 with self.datastore.lock:
--> 164 return self.array[key]
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\dataset.py:358](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/dataset.py:358), in OnDiskArray.__getitem__(self, item)
356 continue
357 # NOTE: fill a single field as found in the message
--> 358 message = self.index.get_field(message_ids[0]) # type: ignore
359 values = get_values_in_order(message, array_field[tuple(array_field_indexes)].shape)
360 array_field.__getitem__(tuple(array_field_indexes)).flat[:] = values
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:484](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:484), in FieldsetIndex.get_field(self, message_id)
483 def get_field(self, message_id: T.Any) -> abc.Field:
--> 484 return ComputedKeysAdapter(self.fieldset[message_id], self.computed_keys)
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:344](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:344), in FileStream.__getitem__(self, item)
342 def __getitem__(self, item: T.Optional[OffsetType]) -> Message:
343 with open(self.path, "rb") as file:
--> 344 return self.message_from_file(file, offset=item)
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:340](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:340), in FileStream.message_from_file(self, file, offset, **kwargs)
338 def message_from_file(self, file, offset=None, **kwargs):
339 # type: (T.IO[bytes], T.Optional[OffsetType], T.Any) -> Message
--> 340 return Message.from_file(file, offset, **kwargs)
File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:93](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:93), in Message.from_file(cls, file, offset, **kwargs)
91 offset, field_in_message = offset
92 if offset is not None:
---> 93 file.seek(offset)
94 codes_id = None
95 if field_in_message == 0:
OSError: [Errno 22] Invalid argument
Loading data using
src.data_loading.load_ens.load_ens_data_D(file)
throws anOSError
.It's unclear from the trace, but the root may be related to running out of RAM, similar to ecmwf/cfgrib#340, although there should be enough for this not to be a problem given the size of data (~3GB files, over 16GB RAM free).
Full trace: