elkir / iccs23-hacakthon-delorean-datasets

MIT License
2 stars 1 forks source link

Error loading data with load_ens_data_D #14

Open ElliottKasoar opened 1 year ago

ElliottKasoar commented 1 year ago

Loading data using src.data_loading.load_ens.load_ens_data_D(file) throws an OSError.

It's unclear from the trace, but the root may be related to running out of RAM, similar to ecmwf/cfgrib#340, although there should be enough for this not to be a problem given the size of data (~3GB files, over 16GB RAM free).

Full trace:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[3], line 1
----> 1 dsD = load_ens_data_D(file)

File [c:\Users\zjh54983\Downloads\hackathon\delorean-datasets\notebooks\..\src\data_loading\load_ens.py:231](file:///C:/Users/zjh54983/Downloads/hackathon/delorean-datasets/src/data_loading/load_ens.py:231), in load_ens_data_D(fn_D, drop_wind_components, temperature_in_C, calculate_diffs, verbose)
    228 # loading everything is slower:
    229 # 51.2 s ± 6.44 s
    230 logging.info(f"Loading {fn_D} (full)")
--> 231 dsD = xr.load_dataset(fn_D, engine='cfgrib', chunks=10) 
    233 # dsD = calculate_wind_speed(dsD, drop_uv= drop_wind_components,verbose=verbose)
    234 # if temperature_in_C:
    235 #     dsD = calculate_temperature_in_C(dsD)
    236 # if calculate_diffs:
    237 #     dsD = get_diff_values(dsD,verbose=verbose)
    238 # logging.info(f"Loading complete")
    239 return dsD

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\backends\api.py:275](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/backends/api.py:275), in load_dataset(filename_or_obj, **kwargs)
    272     raise TypeError("cache has no effect in this context")
    274 with open_dataset(filename_or_obj, **kwargs) as ds:
--> 275     return ds.load()

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\dataset.py:792](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/dataset.py:792), in Dataset.load(self, **kwargs)
    789 chunkmanager = get_chunked_array_type(*lazy_data.values())
    791 # evaluate all the chunked arrays simultaneously
--> 792 evaluated_data = chunkmanager.compute(*lazy_data.values(), **kwargs)
    794 for k, data in zip(lazy_data, evaluated_data):
    795     self.variables[k].data = data

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\daskmanager.py:70](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/daskmanager.py:70), in DaskManager.compute(self, *data, **kwargs)
     67 def compute(self, *data: DaskArray, **kwargs) -> tuple[np.ndarray, ...]:
     68     from dask.array import compute
---> 70     return compute(*data, **kwargs)

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\threaded.py:89](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/threaded.py:89), in get(dsk, keys, cache, num_workers, pool, **kwargs)
     86     elif isinstance(pool, multiprocessing.pool.Pool):
     87         pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
     90     pool.submit,
     91     pool._max_workers,
     92     dsk,
     93     keys,
     94     cache=cache,
     95     get_id=_thread_get_id,
     96     pack_exception=pack_exception,
     97     **kwargs,
     98 )
    100 # Cleanup pools associated to dead threads
    101 with pools_lock:

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\local.py:511](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/local.py:511), in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    509         _execute_task(task, data)  # Re-execute locally
    510     else:
--> 511         raise_exception(exc, tb)
    512 res, worker_id = loads(res_info)
    513 state["cache"][key] = res

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\local.py:319](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/local.py:319), in reraise(exc, tb)
    317 if exc.__traceback__ is not tb:
    318     raise exc.with_traceback(tb)
--> 319 raise exc

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\dask\local.py:224](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/dask/local.py:224), in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    222 try:
    223     task, data = loads(task_info)
--> 224     result = _execute_task(task, data)
    225     id = get_id()
    226     result = dumps((result, id))

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:484](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:484), in ImplicitToExplicitIndexingAdapter.__array__(self, dtype)
    483 def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray:
--> 484     return np.asarray(self.get_duck_array(), dtype=dtype)

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:487](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:487), in ImplicitToExplicitIndexingAdapter.get_duck_array(self)
    486 def get_duck_array(self):
--> 487     return self.array.get_duck_array()

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:664](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:664), in CopyOnWriteArray.get_duck_array(self)
    663 def get_duck_array(self):
--> 664     return self.array.get_duck_array()

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:551](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:551), in LazilyIndexedArray.get_duck_array(self)
    550 def get_duck_array(self):
--> 551     array = self.array[self.key]
    552     # self.array[self.key] is now a numpy array when
    553     # self.array is a BackendArray subclass
    554     # and self.key is BasicIndexer((slice(None, None, None),))
    555     # so we need the explicit check for ExplicitlyIndexed
    556     if isinstance(array, ExplicitlyIndexed):

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\xarray_plugin.py:155](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/xarray_plugin.py:155), in CfGribArrayWrapper.__getitem__(self, key)
    151 def __getitem__(
    152     self,
    153     key: xr.core.indexing.ExplicitIndexer,
    154 ) -> np.ndarray:
--> 155     return xr.core.indexing.explicit_indexing_adapter(
    156         key, self.shape, xr.core.indexing.IndexingSupport.BASIC, self._getitem
    157     )

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\xarray\core\indexing.py:858](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/xarray/core/indexing.py:858), in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
    836 """Support explicit indexing by delegating to a raw indexing method.
    837 
    838 Outer and/or vectorized indexers are supported by indexing a second time
   (...)
    855 Indexing result, in the form of a duck numpy-array.
    856 """
    857 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 858 result = raw_indexing_method(raw_key.tuple)
    859 if numpy_indices.tuple:
    860     # index the loaded np.ndarray
    861     result = NumpyIndexingAdapter(result)[numpy_indices]

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\xarray_plugin.py:164](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/xarray_plugin.py:164), in CfGribArrayWrapper._getitem(self, key)
    159 def _getitem(
    160     self,
    161     key: T.Tuple[T.Any, ...],
    162 ) -> np.ndarray:
    163     with self.datastore.lock:
--> 164         return self.array[key]

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\dataset.py:358](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/dataset.py:358), in OnDiskArray.__getitem__(self, item)
    356     continue
    357 # NOTE: fill a single field as found in the message
--> 358 message = self.index.get_field(message_ids[0])  # type: ignore
    359 values = get_values_in_order(message, array_field[tuple(array_field_indexes)].shape)
    360 array_field.__getitem__(tuple(array_field_indexes)).flat[:] = values

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:484](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:484), in FieldsetIndex.get_field(self, message_id)
    483 def get_field(self, message_id: T.Any) -> abc.Field:
--> 484     return ComputedKeysAdapter(self.fieldset[message_id], self.computed_keys)

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:344](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:344), in FileStream.__getitem__(self, item)
    342 def __getitem__(self, item: T.Optional[OffsetType]) -> Message:
    343     with open(self.path, "rb") as file:
--> 344         return self.message_from_file(file, offset=item)

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:340](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:340), in FileStream.message_from_file(self, file, offset, **kwargs)
    338 def message_from_file(self, file, offset=None, **kwargs):
    339     # type: (T.IO[bytes], T.Optional[OffsetType], T.Any) -> Message
--> 340     return Message.from_file(file, offset, **kwargs)

File [c:\Users\zjh54983\Anaconda3\envs\phd_ph6-02_ens-extended_analysis\Lib\site-packages\cfgrib\messages.py:93](file:///C:/Users/zjh54983/Anaconda3/envs/phd_ph6-02_ens-extended_analysis/Lib/site-packages/cfgrib/messages.py:93), in Message.from_file(cls, file, offset, **kwargs)
     91     offset, field_in_message = offset
     92 if offset is not None:
---> 93     file.seek(offset)
     94 codes_id = None
     95 if field_in_message == 0:

OSError: [Errno 22] Invalid argument