ProjectPythia / cmip6-cookbook

Examples of analysis of Google Cloud CMIP6 data using Pangeo tools
https://projectpythia.org/cmip6-cookbook/
Apache License 2.0
11 stars 10 forks source link

Error when executing code in binder #39

Closed chiaweh2 closed 1 year ago

chiaweh2 commented 1 year ago

I was trying out the CMIP6 cookbook but after binder launch the first cell execution will show a error like this

---------------------------------------------------------------------------
SystemError                               Traceback (most recent call last)
Cell In[1], line 1
----> 1 import xarray as xr
      2 xr.set_options(display_style='html')
      3 import intake

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/__init__.py:1
----> 1 from . import testing, tutorial
      2 from .backends.api import (
      3     load_dataarray,
      4     load_dataset,
   (...)
      8     save_mfdataset,
      9 )
     10 from .backends.rasterio_ import open_rasterio

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/testing.py:9
      6 import numpy as np
      7 import pandas as pd
----> 9 from xarray.core import duck_array_ops, formatting, utils
     10 from xarray.core.dataarray import DataArray
     11 from xarray.core.dataset import Dataset

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/duck_array_ops.py:28
     26 from . import dask_array_ops, dtypes, npcompat, nputils
     27 from .nputils import nanfirst, nanlast
---> 28 from .pycompat import cupy_array_type, is_duck_dask_array
     29 from .utils import is_duck_array
     31 try:

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/pycompat.py:48
     44         self.type = duck_array_type
     45         self.available = duck_array_module is not None
---> 48 dsk = DuckArrayModule("dask")
     49 dask_version = dsk.version
     50 dask_array_type = dsk.type

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/pycompat.py:27, in DuckArrayModule.__init__(self, mod)
     24 duck_array_version = Version(duck_array_module.__version__)
     26 if mod == "dask":
---> 27     duck_array_type = (import_module("dask.array").Array,)
     28 elif mod == "pint":
     29     duck_array_type = (duck_array_module.Quantity,)

File /srv/conda/envs/notebook/lib/python3.9/importlib/__init__.py:127, in import_module(name, package)
    125             break
    126         level += 1
--> 127 return _bootstrap._gcd_import(name[level:], package, level)

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/__init__.py:2
      1 try:
----> 2     from dask.array import backends, fft, lib, linalg, ma, overlap, random
      3     from dask.array.blockwise import atop, blockwise
      4     from dask.array.chunk_types import register_chunk_type

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/backends.py:18
     16 from dask.array.numpy_compat import divide as np_divide
     17 from dask.array.numpy_compat import ma_divide
---> 18 from dask.array.percentile import _percentile
     19 from dask.backends import CreationDispatch, DaskBackendEntrypoint
     21 concatenate_lookup.register((object, np.ndarray), np.concatenate)

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/percentile.py:9
      6 import numpy as np
      7 from tlz import merge
----> 9 from dask.array.core import Array
     10 from dask.array.numpy_compat import _numpy_122
     11 from dask.array.numpy_compat import percentile as np_percentile

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/core.py:36
     34 from dask.array import chunk
     35 from dask.array.chunk import getitem
---> 36 from dask.array.chunk_types import is_valid_array_chunk, is_valid_chunk_type
     38 # Keep einsum_lookup and tensordot_lookup here for backwards compatibility
     39 from dask.array.dispatch import (  # noqa: F401
     40     concatenate_lookup,
     41     einsum_lookup,
     42     tensordot_lookup,
     43 )

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/chunk_types.py:122
    119     pass
    121 try:
--> 122     import sparse
    124     register_chunk_type(sparse.SparseArray)
    125 except ImportError:

File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/__init__.py:1
----> 1 from ._coo import COO, as_coo
      2 from ._compressed import GCXS
      3 from ._dok import DOK

File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/_coo/__init__.py:1
----> 1 from .core import COO, as_coo
      2 from .common import (
      3     concatenate,
      4     clip,
   (...)
     22     diagonalize,
     23 )
     25 __all__ = [
     26     "COO",
     27     "as_coo",
   (...)
     47     "diagonalize",
     48 ]

File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/_coo/core.py:9
      6 import warnings
      8 import numpy as np
----> 9 import numba
     10 import scipy.sparse
     11 from numpy.lib.mixins import NDArrayOperatorsMixin

File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/__init__.py:43
     39 from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil,
     40                                    jit_module)
     42 # Re-export vectorize decorators and the thread layer querying function
---> 43 from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
     44                             get_num_threads, set_num_threads)
     46 # Re-export Numpy helpers
     47 from numba.np.numpy_support import carray, farray, from_dtype

File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/np/ufunc/__init__.py:3
      1 # -*- coding: utf-8 -*-
----> 3 from numba.np.ufunc.decorators import Vectorize, GUVectorize, vectorize, guvectorize
      4 from numba.np.ufunc._internal import PyUFunc_None, PyUFunc_Zero, PyUFunc_One
      5 from numba.np.ufunc import _internal, array_exprs

File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/np/ufunc/decorators.py:3
      1 import inspect
----> 3 from numba.np.ufunc import _internal
      4 from numba.np.ufunc.parallel import ParallelUFuncBuilder, ParallelGUFuncBuilder
      6 from numba.core.registry import TargetRegistry

SystemError: initialization of _internal failed without raising an exception

The error will be gone if import xarray as xr is removed. Does anyone have the same problem? Thank you!

r-ford commented 1 year ago

I'm also getting the same error. It seems related to this numba issue, so I'll see if pinning numpy to a previous version works for now.

chiaweh2 commented 1 year ago

Thank you @r-ford for pin pointing the error! The cookbook is working now.

chiaweh2 commented 1 year ago

Sorry to close and reopen. The import xarray does not have error now. I stumble on the forth line/cell of the code

dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True})
list(dset_dict.keys())

It seems the error might be related to the original error I also test the same code on my local computer. It seems to work fine at this particular code.

---------------------------------------------------------------------------
SystemError                               Traceback (most recent call last)
File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:240, in ESMDataSource._open_dataset(self)
    220 datasets = [
    221     _open_dataset(
    222         record[self.path_column_name],
   (...)
    237     for _, record in self.df.iterrows()
    238 ]
--> 240 datasets = dask.compute(*datasets)
    241 if len(datasets) == 1:

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/base.py:599, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    597     postcomputes.append(x.__dask_postcompute__())
--> 599 results = schedule(dsk, keys, **kwargs)
    600 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
     87         pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
     90     pool.submit,
     91     pool._max_workers,
     92     dsk,
     93     keys,
     94     cache=cache,
     95     get_id=_thread_get_id,
     96     pack_exception=pack_exception,
     97     **kwargs,
     98 )
    100 # Cleanup pools associated to dead threads

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    510     else:
--> 511         raise_exception(exc, tb)
    512 res, worker_id = loads(res_info)

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/local.py:319, in reraise(exc, tb)
    318     raise exc.with_traceback(tb)
--> 319 raise exc

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
    225 id = get_id()

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
    116     # Note: Don't assign the subtask results to a variable. numpy detects
    117     # temporaries by their reference count and can execute certain
    118     # operations in-place.
--> 119     return func(*(_execute_task(a, cache) for a in args))
    120 elif not ishashable(arg):

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/utils.py:71, in apply(func, args, kwargs)
     70 if kwargs:
---> 71     return func(*args, **kwargs)
     72 else:

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:73, in _open_dataset(urlpath, varname, xarray_open_kwargs, preprocess, requested_variables, additional_attrs, expand_dims, data_format)
     72 else:
---> 73     ds = xr.open_dataset(url, **xarray_open_kwargs)
     74     if preprocess is not None:

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py:547, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
    541 backend_ds = backend.open_dataset(
    542     filename_or_obj,
    543     drop_variables=drop_variables,
    544     **decoders,
    545     **kwargs,
    546 )
--> 547 ds = _dataset_from_backend_dataset(
    548     backend_ds,
    549     filename_or_obj,
    550     engine,
    551     chunks,
    552     cache,
    553     overwrite_encoded_chunks,
    554     inline_array,
    555     drop_variables=drop_variables,
    556     **decoders,
    557     **kwargs,
    558 )
    559 return ds

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py:358, in _dataset_from_backend_dataset(backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, **extra_tokens)
    357 else:
--> 358     ds = _chunk_ds(
    359         backend_ds,
    360         filename_or_obj,
    361         engine,
    362         chunks,
    363         overwrite_encoded_chunks,
    364         inline_array,
    365         **extra_tokens,
    366     )
    368 ds.set_close(backend_ds._close)

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/backends/api.py:326, in _chunk_ds(backend_ds, filename_or_obj, engine, chunks, overwrite_encoded_chunks, inline_array, **extra_tokens)
    325 for name, var in backend_ds.variables.items():
--> 326     var_chunks = _get_chunk(var, chunks)
    327     variables[name] = _maybe_chunk(
    328         name,
    329         var,
   (...)
    334         inline_array=inline_array,
    335     )

File /srv/conda/envs/notebook/lib/python3.9/site-packages/xarray/core/dataset.py:211, in _get_chunk(var, chunks)
    207 """
    208 Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
    209 """
--> 211 import dask.array as da
    213 if isinstance(var, IndexVariable):

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/__init__.py:2
      1 try:
----> 2     from dask.array import backends, fft, lib, linalg, ma, overlap, random
      3     from dask.array.blockwise import atop, blockwise

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/backends.py:18
     17 from dask.array.numpy_compat import ma_divide
---> 18 from dask.array.percentile import _percentile
     19 from dask.backends import CreationDispatch, DaskBackendEntrypoint

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/percentile.py:9
      7 from tlz import merge
----> 9 from dask.array.core import Array
     10 from dask.array.numpy_compat import _numpy_122

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/core.py:36
     35 from dask.array.chunk import getitem
---> 36 from dask.array.chunk_types import is_valid_array_chunk, is_valid_chunk_type
     38 # Keep einsum_lookup and tensordot_lookup here for backwards compatibility

File /srv/conda/envs/notebook/lib/python3.9/site-packages/dask/array/chunk_types.py:122
    121 try:
--> 122     import sparse
    124     register_chunk_type(sparse.SparseArray)

File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/__init__.py:1
----> 1 from ._coo import COO, as_coo
      2 from ._compressed import GCXS

File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/_coo/__init__.py:1
----> 1 from .core import COO, as_coo
      2 from .common import (
      3     concatenate,
      4     clip,
   (...)
     22     diagonalize,
     23 )

File /srv/conda/envs/notebook/lib/python3.9/site-packages/sparse/_coo/core.py:9
      8 import numpy as np
----> 9 import numba
     10 import scipy.sparse

File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/__init__.py:43
     42 # Re-export vectorize decorators and the thread layer querying function
---> 43 from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
     44                             get_num_threads, set_num_threads)
     46 # Re-export Numpy helpers

File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/np/ufunc/__init__.py:3
      1 # -*- coding: utf-8 -*-
----> 3 from numba.np.ufunc.decorators import Vectorize, GUVectorize, vectorize, guvectorize
      4 from numba.np.ufunc._internal import PyUFunc_None, PyUFunc_Zero, PyUFunc_One

File /srv/conda/envs/notebook/lib/python3.9/site-packages/numba/np/ufunc/decorators.py:3
      1 import inspect
----> 3 from numba.np.ufunc import _internal
      4 from numba.np.ufunc.parallel import ParallelUFuncBuilder, ParallelGUFuncBuilder

SystemError: initialization of _internal failed without raising an exception

The above exception was the direct cause of the following exception:

ESMDataSourceError                        Traceback (most recent call last)
Cell In[4], line 1
----> 1 dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True})
      2 list(dset_dict.keys())

File /srv/conda/envs/notebook/lib/python3.9/site-packages/pydantic/decorator.py:40, in pydantic.decorator.validate_arguments.validate.wrapper_function()

File /srv/conda/envs/notebook/lib/python3.9/site-packages/pydantic/decorator.py:134, in pydantic.decorator.ValidatedFunction.call()

File /srv/conda/envs/notebook/lib/python3.9/site-packages/pydantic/decorator.py:206, in pydantic.decorator.ValidatedFunction.execute()

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/core.py:651, in esm_datastore.to_dataset_dict(self, xarray_open_kwargs, xarray_combine_by_coords_kwargs, preprocess, storage_options, progressbar, aggregate, skip_on_error, **kwargs)
    649         except Exception as exc:
    650             if not skip_on_error:
--> 651                 raise exc
    652 self.datasets = self._create_derived_variables(datasets, skip_on_error)
    653 return self.datasets

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/core.py:647, in esm_datastore.to_dataset_dict(self, xarray_open_kwargs, xarray_combine_by_coords_kwargs, preprocess, storage_options, progressbar, aggregate, skip_on_error, **kwargs)
    645 for task in gen:
    646     try:
--> 647         key, ds = task.result()
    648         datasets[key] = ds
    649     except Exception as exc:

File /srv/conda/envs/notebook/lib/python3.9/concurrent/futures/_base.py:439, in Future.result(self, timeout)
    437     raise CancelledError()
    438 elif self._state == FINISHED:
--> 439     return self.__get_result()
    441 self._condition.wait(timeout)
    443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File /srv/conda/envs/notebook/lib/python3.9/concurrent/futures/_base.py:391, in Future.__get_result(self)
    389 if self._exception:
    390     try:
--> 391         raise self._exception
    392     finally:
    393         # Break a reference cycle with the exception in self._exception
    394         self = None

File /srv/conda/envs/notebook/lib/python3.9/concurrent/futures/thread.py:58, in _WorkItem.run(self)
     55     return
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:
     60     self.future.set_exception(exc)

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/core.py:789, in _load_source(key, source)
    788 def _load_source(key, source):
--> 789     return key, source.to_dask()

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:273, in ESMDataSource.to_dask(self)
    271 def to_dask(self):
    272     """Return xarray object (which will have chunks)"""
--> 273     self._load_metadata()
    274     return self._ds

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake/source/base.py:285, in DataSourceBase._load_metadata(self)
    283 """load metadata only if needed"""
    284 if self._schema is None:
--> 285     self._schema = self._get_schema()
    286     self.dtype = self._schema.dtype
    287     self.shape = self._schema.shape

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:205, in ESMDataSource._get_schema(self)
    202 def _get_schema(self) -> Schema:
    204     if self._ds is None:
--> 205         self._open_dataset()
    206         metadata = {'dims': {}, 'data_vars': {}, 'coords': ()}
    207         self._schema = Schema(
    208             datashape=None,
    209             dtype=None,
   (...)
    212             extra_metadata=metadata,
    213         )

File /srv/conda/envs/notebook/lib/python3.9/site-packages/intake_esm/source.py:265, in ESMDataSource._open_dataset(self)
    262     self._ds.attrs[OPTIONS['dataset_key']] = self.key
    264 except Exception as exc:
--> 265     raise ESMDataSourceError(
    266         f"""Failed to load dataset with key='{self.key}'
    267          You can use `cat['{self.key}'].df` to inspect the assets/files for this key.
    268          """
    269     ) from exc

ESMDataSourceError: Failed to load dataset with key='ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.Oyr.gn'
                 You can use `cat['ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.Oyr.gn'].df` to inspect the assets/files for this key.
r-ford commented 1 year ago

No problem! Using numpy<1.24 didn't help (#40), but this seems like something that might resolve itself once things are fixed upstream. Maybe someone @ProjectPythia/infrastructure is familiar.

chiaweh2 commented 1 year ago

Got it thank you!

brian-rose commented 1 year ago

@r-ford did this get resolved upstream?

r-ford commented 1 year ago

@brian-rose I was able to run the notebook in the binder, so I believe this is resolved.

mgrover1 commented 1 year ago

Yeah, it's resolved - we cut a new release of intake-esm a couple weeks ago.