Closed chiaweh2 closed 1 year ago
I'm also getting the same error. It seems related to this numba issue, so I'll see if pinning numpy to a previous version works for now.
Thank you @r-ford for pin pointing the error! The cookbook is working now.
Sorry to close and reopen. The import xarray does not have error now. I stumble on the forth line/cell of the code
dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True})
list(dset_dict.keys())
It seems the error might be related to the original error I also test the same code on my local computer. It seems to work fine at this particular code.
---------------------------------------------------------------------------
SystemError Traceback (most recent call last)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:240, in ESMDataSource._open_dataset(self)
220 datasets = [
221 _open_dataset(
222 record[self.path_column_name],
(...)
237 for _, record in self.df.iterrows()
238 ]
--> 240 datasets = dask.compute(*datasets)
241 if len(datasets) == 1:
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/base.py:599, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
597 postcomputes.append(x.__dask_postcompute__())
--> 599 results = schedule(dsk, keys, **kwargs)
600 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/local.py:319, in reraise(exc, tb)
318 raise exc.with_traceback(tb)
--> 319 raise exc
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
116 # Note: Don't assign the subtask results to a variable. numpy detects
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/utils.py:71, in apply(func, args, kwargs)
70 if kwargs:
---> 71 return func(*args, **kwargs)
72 else:
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:73, in _open_dataset(urlpath, varname, xarray_open_kwargs, preprocess, requested_variables, additional_attrs, expand_dims, data_format)
72 else:
---> 73 ds = xr.open_dataset(url, **xarray_open_kwargs)
74 if preprocess is not None:
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py:547, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
541 backend_ds = backend.open_dataset(
542 filename_or_obj,
543 drop_variables=drop_variables,
544 **decoders,
545 **kwargs,
546 )
--> 547 ds = _dataset_from_backend_dataset(
548 backend_ds,
549 filename_or_obj,
550 engine,
551 chunks,
552 cache,
553 overwrite_encoded_chunks,
554 inline_array,
555 drop_variables=drop_variables,
556 **decoders,
557 **kwargs,
558 )
559 return ds
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py:358, in _dataset_from_backend_dataset(backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, **extra_tokens)
357 else:
--> 358 ds = _chunk_ds(
359 backend_ds,
360 filename_or_obj,
361 engine,
362 chunks,
363 overwrite_encoded_chunks,
364 inline_array,
365 **extra_tokens,
366 )
368 ds.set_close(backend_ds._close)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py:326, in _chunk_ds(backend_ds, filename_or_obj, engine, chunks, overwrite_encoded_chunks, inline_array, **extra_tokens)
325 for name, var in backend_ds.variables.items():
--> 326 var_chunks = _get_chunk(var, chunks)
327 variables[name] = _maybe_chunk(
328 name,
329 var,
(...)
334 inline_array=inline_array,
335 )
File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/dataset.py:211, in _get_chunk(var, chunks)
207 """
208 Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
209 """
--> 211 import dask.array as da
213 if isinstance(var, IndexVariable):
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/__init__.py:2
1 try:
----> 2 from dask.array import backends, fft, lib, linalg, ma, overlap, random
3 from dask.array.blockwise import atop, blockwise
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/backends.py:18
17 from dask.array.numpy_compat import ma_divide
---> 18 from dask.array.percentile import _percentile
19 from dask.backends import CreationDispatch, DaskBackendEntrypoint
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/percentile.py:9
7 from tlz import merge
----> 9 from dask.array.core import Array
10 from dask.array.numpy_compat import _numpy_122
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/core.py:36
35 from dask.array.chunk import getitem
---> 36 from dask.array.chunk_types import is_valid_array_chunk, is_valid_chunk_type
38 # Keep einsum_lookup and tensordot_lookup here for backwards compatibility
File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/chunk_types.py:122
121 try:
--> 122 import sparse
124 register_chunk_type(sparse.SparseArray)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/__init__.py:1
----> 1 from ._coo import COO, as_coo
2 from ._compressed import GCXS
File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/_coo/__init__.py:1
----> 1 from .core import COO, as_coo
2 from .common import (
3 concatenate,
4 clip,
(...)
22 diagonalize,
23 )
File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/_coo/core.py:9
8 import numpy as np
----> 9 import numba
10 import scipy.sparse
File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/__init__.py:43
42 # Re-export vectorize decorators and the thread layer querying function
---> 43 from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
44 get_num_threads, set_num_threads)
46 # Re-export Numpy helpers
File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/np/ufunc/__init__.py:3
1 # -*- coding: utf-8 -*-
----> 3 from numba.np.ufunc.decorators import Vectorize, GUVectorize, vectorize, guvectorize
4 from numba.np.ufunc._internal import PyUFunc_None, PyUFunc_Zero, PyUFunc_One
File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/np/ufunc/decorators.py:3
1 import inspect
----> 3 from numba.np.ufunc import _internal
4 from numba.np.ufunc.parallel import ParallelUFuncBuilder, ParallelGUFuncBuilder
SystemError: initialization of _internal failed without raising an exception
The above exception was the direct cause of the following exception:
ESMDataSourceError Traceback (most recent call last)
Cell In[4], line 1
----> 1 dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True})
2 list(dset_dict.keys())
File /srv/conda/envs/notebook/lib/python3.9/site-packages/pydantic/decorator.py:40, in pydantic.decorator.validate_arguments.validate.wrapper_function()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/pydantic/decorator.py:134, in pydantic.decorator.ValidatedFunction.call()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/pydantic/decorator.py:206, in pydantic.decorator.ValidatedFunction.execute()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/core.py:651, in esm_datastore.to_dataset_dict(self, xarray_open_kwargs, xarray_combine_by_coords_kwargs, preprocess, storage_options, progressbar, aggregate, skip_on_error, **kwargs)
649 except Exception as exc:
650 if not skip_on_error:
--> 651 raise exc
652 self.datasets = self._create_derived_variables(datasets, skip_on_error)
653 return self.datasets
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/core.py:647, in esm_datastore.to_dataset_dict(self, xarray_open_kwargs, xarray_combine_by_coords_kwargs, preprocess, storage_options, progressbar, aggregate, skip_on_error, **kwargs)
645 for task in gen:
646 try:
--> 647 key, ds = task.result()
648 datasets[key] = ds
649 except Exception as exc:
File /srv/conda/envs/notebook/lib/python3.9/concurrent/futures/_base.py:439, in Future.result(self, timeout)
437 raise CancelledError()
438 elif self._state == FINISHED:
--> 439 return self.__get_result()
441 self._condition.wait(timeout)
443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /srv/conda/envs/notebook/lib/python3.9/concurrent/futures/_base.py:391, in Future.__get_result(self)
389 if self._exception:
390 try:
--> 391 raise self._exception
392 finally:
393 # Break a reference cycle with the exception in self._exception
394 self = None
File /srv/conda/envs/notebook/lib/python3.9/concurrent/futures/thread.py:58, in _WorkItem.run(self)
55 return
57 try:
---> 58 result = self.fn(*self.args, **self.kwargs)
59 except BaseException as exc:
60 self.future.set_exception(exc)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/core.py:789, in _load_source(key, source)
788 def _load_source(key, source):
--> 789 return key, source.to_dask()
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:273, in ESMDataSource.to_dask(self)
271 def to_dask(self):
272 """Return xarray object (which will have chunks)"""
--> 273 self._load_metadata()
274 return self._ds
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake/source/base.py:285, in DataSourceBase._load_metadata(self)
283 """load metadata only if needed"""
284 if self._schema is None:
--> 285 self._schema = self._get_schema()
286 self.dtype = self._schema.dtype
287 self.shape = self._schema.shape
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:205, in ESMDataSource._get_schema(self)
202 def _get_schema(self) -> Schema:
204 if self._ds is None:
--> 205 self._open_dataset()
206 metadata = {'dims': {}, 'data_vars': {}, 'coords': ()}
207 self._schema = Schema(
208 datashape=None,
209 dtype=None,
(...)
212 extra_metadata=metadata,
213 )
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:265, in ESMDataSource._open_dataset(self)
262 self._ds.attrs[OPTIONS['dataset_key']] = self.key
264 except Exception as exc:
--> 265 raise ESMDataSourceError(
266 f"""Failed to load dataset with key='{self.key}'
267 You can use `cat['{self.key}'].df` to inspect the assets/files for this key.
268 """
269 ) from exc
ESMDataSourceError: Failed to load dataset with key='ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.Oyr.gn'
You can use `cat['ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.Oyr.gn'].df` to inspect the assets/files for this key.
No problem! Using numpy<1.24 didn't help (#40), but this seems like something that might resolve itself once things are fixed upstream. Maybe someone @ProjectPythia/infrastructure is familiar.
Got it thank you!
@r-ford did this get resolved upstream?
@brian-rose I was able to run the notebook in the binder, so I believe this is resolved.
Yeah, it's resolved - we cut a new release of intake-esm a couple weeks ago.
I was trying out the CMIP6 cookbook but after binder launch the first cell execution will show a error like this
The error will be gone if import xarray as xr is removed. Does anyone have the same problem? Thank you!