The experiment '01deg_jra55v13_ryf9091' can't currently be converted to an xarray via to_dask. From looking at the Dask dashboard, all of the open_dataset calls complete, but then something crashes out at the concatenation stage.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/source.py:259, in ESMDataSource._open_dataset(self)
255 datasets = [
256 ds.set_coords(set(ds.variables) - set(ds.attrs[OPTIONS['vars_key']]))
257 for ds in datasets
258 ]
--> 259 self._ds = xr.combine_by_coords(datasets, **self.xarray_combine_by_coords_kwargs)
261 self._ds.attrs[OPTIONS['dataset_key']] = self.key
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/xarray/core/combine.py:958, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)
956 # Perform the multidimensional combine on each group of data variables
957 # before merging back together
--> 958 concatenated_grouped_by_data_vars = tuple(
959 _combine_single_variable_hypercube(
960 tuple(datasets_with_same_vars),
961 fill_value=fill_value,
962 data_vars=data_vars,
963 coords=coords,
964 compat=compat,
965 join=join,
966 combine_attrs=combine_attrs,
967 )
968 for vars, datasets_with_same_vars in grouped_by_vars
969 )
971 return merge(
972 concatenated_grouped_by_data_vars,
973 compat=compat,
(...)
976 combine_attrs=combine_attrs,
977 )
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/xarray/core/combine.py:959, in <genexpr>(.0)
956 # Perform the multidimensional combine on each group of data variables
957 # before merging back together
958 concatenated_grouped_by_data_vars = tuple(
--> 959 _combine_single_variable_hypercube(
960 tuple(datasets_with_same_vars),
961 fill_value=fill_value,
962 data_vars=data_vars,
963 coords=coords,
964 compat=compat,
965 join=join,
966 combine_attrs=combine_attrs,
967 )
968 for vars, datasets_with_same_vars in grouped_by_vars
969 )
971 return merge(
972 concatenated_grouped_by_data_vars,
973 compat=compat,
(...)
976 combine_attrs=combine_attrs,
977 )
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/xarray/core/combine.py:619, in _combine_single_variable_hypercube(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)
614 raise ValueError(
615 "At least one Dataset is required to resolve variable names "
616 "for combined hypercube."
617 )
--> 619 combined_ids, concat_dims = _infer_concat_order_from_coords(list(datasets))
621 if fill_value is None:
622 # check that datasets form complete hypercube
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/xarray/core/combine.py:144, in _infer_concat_order_from_coords(datasets)
143 if len(datasets) > 1 and not concat_dims:
--> 144 raise ValueError(
145 "Could not find any dimension coordinates to use to "
146 "order the datasets for concatenation"
147 )
149 combined_ids = dict(zip(tile_ids, datasets))
ValueError: Could not find any dimension coordinates to use to order the datasets for concatenation
The above exception was the direct cause of the following exception:
ESMDataSourceError Traceback (most recent call last)
Cell In[4], line 1
----> 1 darray = ht_search.to_dask()
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/core.py:814, in esm_datastore.to_dask(self, **kwargs)
810 if len(self) != 1: # quick check to fail more quickly if there are many results
811 raise ValueError(
812 f'Expected exactly one dataset. Received {len(self)} datasets. Please refine your search or use `.to_dataset_dict()`.'
813 )
--> 814 res = self.to_dataset_dict(**{**kwargs, 'progressbar': False})
815 if len(res) != 1: # extra check in case kwargs did modify something
816 raise ValueError(
817 f'Expected exactly one dataset. Received {len(self)} datasets. Please refine your search or use `.to_dataset_dict()`.'
818 )
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/pydantic/deprecated/decorator.py:55, in validate_arguments.<locals>.validate.<locals>.wrapper_function(*args, **kwargs)
53 @wraps(_func)
54 def wrapper_function(*args: Any, **kwargs: Any) -> Any:
---> 55 return vd.call(*args, **kwargs)
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/pydantic/deprecated/decorator.py:150, in ValidatedFunction.call(self, *args, **kwargs)
148 def call(self, *args: Any, **kwargs: Any) -> Any:
149 m = self.init_model_instance(*args, **kwargs)
--> 150 return self.execute(m)
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/pydantic/deprecated/decorator.py:222, in ValidatedFunction.execute(self, m)
220 return self.raw_function(*args_, **kwargs, **var_kwargs)
221 else:
--> 222 return self.raw_function(**d, **var_kwargs)
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/core.py:686, in esm_datastore.to_dataset_dict(self, xarray_open_kwargs, xarray_combine_by_coords_kwargs, preprocess, storage_options, progressbar, aggregate, skip_on_error, **kwargs)
684 except Exception as exc:
685 if not skip_on_error:
--> 686 raise exc
687 self.datasets = self._create_derived_variables(datasets, skip_on_error)
688 return self.datasets
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/core.py:682, in esm_datastore.to_dataset_dict(self, xarray_open_kwargs, xarray_combine_by_coords_kwargs, preprocess, storage_options, progressbar, aggregate, skip_on_error, **kwargs)
680 for task in gen:
681 try:
--> 682 key, ds = task.result()
683 datasets[key] = ds
684 except Exception as exc:
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/concurrent/futures/_base.py:451, in Future.result(self, timeout)
449 raise CancelledError()
450 elif self._state == FINISHED:
--> 451 return self.__get_result()
453 self._condition.wait(timeout)
455 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self)
401 if self._exception:
402 try:
--> 403 raise self._exception
404 finally:
405 # Break a reference cycle with the exception in self._exception
406 self = None
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/concurrent/futures/thread.py:58, in _WorkItem.run(self)
55 return
57 try:
---> 58 result = self.fn(*self.args, **self.kwargs)
59 except BaseException as exc:
60 self.future.set_exception(exc)
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/core.py:833, in _load_source(key, source)
832 def _load_source(key, source):
--> 833 return key, source.to_dask()
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/source.py:272, in ESMDataSource.to_dask(self)
270 def to_dask(self):
271 """Return xarray object (which will have chunks)"""
--> 272 self._load_metadata()
273 return self._ds
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake/source/base.py:283, in DataSourceBase._load_metadata(self)
281 """load metadata only if needed"""
282 if self._schema is None:
--> 283 self._schema = self._get_schema()
284 self.dtype = self._schema.dtype
285 self.shape = self._schema.shape
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/source.py:208, in ESMDataSource._get_schema(self)
206 def _get_schema(self) -> Schema:
207 if self._ds is None:
--> 208 self._open_dataset()
209 metadata = {'dims': {}, 'data_vars': {}, 'coords': ()}
210 self._schema = Schema(
211 datashape=None,
212 dtype=None,
(...)
215 extra_metadata=metadata,
216 )
File /g/data/hh5/public/apps/miniconda3/envs/analysis3-24.07/lib/python3.10/site-packages/intake_esm/source.py:264, in ESMDataSource._open_dataset(self)
261 self._ds.attrs[OPTIONS['dataset_key']] = self.key
263 except Exception as exc:
--> 264 raise ESMDataSourceError(
265 f"""Failed to load dataset with key='{self.key}'
266 You can use `cat['{self.key}'].df` to inspect the assets/files for this key.
267 """
268 ) from exc
ESMDataSourceError: Failed to load dataset with key='ocean_grid.fx'
You can use `cat['ocean_grid.fx'].df` to inspect the assets/files for this key.
Additional context
I discovered this by accident whilst trying to merge the update version of the COSIMA Recipes example "Cross-contour_transport.ipynb" into the branch that is doing the Intake update of the same recipe: https://github.com/COSIMA/cosima-recipes/pull/356 . The issue doesn't actually affect the example so far, as I can workaround it by querying on a single start_time for the first part of the recipe, and then the time range used in the second part of the recipe doesn't seem to be affected.
Describe the bug
The experiment '01deg_jra55v13_ryf9091' can't currently be converted to an xarray via
to_dask
. From looking at the Dask dashboard, all of theopen_dataset
calls complete, but then something crashes out at the concatenation stage.To Reproduce
In a Jupyter notebook with a few CPUs, run:
The last cell will fail with the following:
Additional context
I discovered this by accident whilst trying to merge the update version of the COSIMA Recipes example "Cross-contour_transport.ipynb" into the branch that is doing the Intake update of the same recipe: https://github.com/COSIMA/cosima-recipes/pull/356 . The issue doesn't actually affect the example so far, as I can workaround it by querying on a single
start_time
for the first part of the recipe, and then the time range used in the second part of the recipe doesn't seem to be affected.