noaa-ocs-modeling / EnsemblePerturbation

perturbation of coupled model input over a space of input variables
https://ensembleperturbation.readthedocs.io
Creative Commons Zero v1.0 Universal
7 stars 3 forks source link

Failed `extrapolate_water_elevation_to_dry_areas()` when loading training dataset in chunks. #145

Open FariborzDaneshvar-NOAA opened 1 month ago

FariborzDaneshvar-NOAA commented 1 month ago

If I use chunks='auto' for reading the subset file:

xarray.open_dataset(subset_filename, chunks='auto')

Then extrapolate_water_elevation_to_dry_areas() function that uses training set from the subset (training_set = subset.sel(run=training_perturbations['run'])) as an input (da=training_set) will fail with the following KeyError:

KeyError: 'Indexing with a boolean dask array is not allowed. This will result in a dask array of unknown shape. Such arrays are unsupported by Xarray. Please compute the indexer first using .compute()'

If I add .compute() to line 887 (as explained in this PR), then I will get this new NotImplementedError:

`NotImplementedError: xarray can't set arrays with multiple array indices to dask yet.`

Full error messages in the following comments.

FariborzDaneshvar-NOAA commented 1 month ago

First Error (KeyError):

KeyError                                  Traceback (most recent call last)
Cell In[44], line 13
     10     training_set_adjusted = training_set.copy(deep=True)
     11 else:
     12     # make an adjusted training set for dry areas..
---> 13     training_set_adjusted = extrapolate_water_elevation_to_dry_areas(
     14         da=training_set,
     15         k_neighbors=k_neighbors,
     16         idw_order=idw_order,
     17         compute_headloss=True,
     18         mann_coef=mann_coef,
     19         u_ref=0.4,
     20         d_ref=1,
     21     )

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/ensembleperturbation/parsing/adcirc.py:890, in extrapolate_water_elevation_to_dry_areas(da, k_neighbors, idw_order, compute_headloss, mann_coef, u_ref, d_ref, min_depth)
    888 tree = KDTree(projected_coordinates[~null])
    889 dd, nn = tree.query(projected_coordinates[null], k=k_neighbors)
--> 890 max_allowable_values = da['depth'][null].values + min_depth - numpy.finfo(float).eps
    891 if k_neighbors == 1:
    892     headloss = dd * friction_factor  # hydraulic friction loss

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataarray.py:823, in DataArray.__getitem__(self, key)
    820     return self._getitem_coord(key)
    821 else:
    822     # xarray-style array indexing
--> 823     return self.isel(indexers=self._item_key_to_dict(key))

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataarray.py:1413, in DataArray.isel(self, indexers, drop, missing_dims, **indexers_kwargs)
   1410 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
   1412 if any(is_fancy_indexer(idx) for idx in indexers.values()):
-> 1413     ds = self._to_temp_dataset()._isel_fancy(
   1414         indexers, drop=drop, missing_dims=missing_dims
   1415     )
   1416     return self._from_temp_dataset(ds)
   1418 # Much faster algorithm for when all indexers are ints, slices, one-dimensional
   1419 # lists, or zero or one-dimensional np.ndarray's

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataset.py:2704, in Dataset._isel_fancy(self, indexers, drop, missing_dims)
   2700 var_indexers = {
   2701     k: v for k, v in valid_indexers.items() if k in var.dims
   2702 }
   2703 if var_indexers:
-> 2704     new_var = var.isel(indexers=var_indexers)
   2705     # drop scalar coordinates
   2706     # https://github.com/pydata/xarray/issues/6554
   2707     if name in self.coords and drop and new_var.ndim == 0:

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:1378, in Variable.isel(self, indexers, missing_dims, **indexers_kwargs)
   1375 indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
   1377 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
-> 1378 return self[key]

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:899, in Variable.__getitem__(self, key)
    886 def __getitem__(self: T_Variable, key) -> T_Variable:
    887     """Return a new Variable object whose contents are consistent with
    888     getting the provided key from the underlying data.
    889 
   (...)
    897     array `x.values` directly.
    898     """
--> 899     dims, indexer, new_order = self._broadcast_indexes(key)
    900     data = as_indexable(self._data)[indexer]
    901     if new_order:

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:732, in Variable._broadcast_indexes(self, key)
    729 if all(isinstance(k, BASIC_INDEXING_TYPES) for k in key):
    730     return self._broadcast_indexes_basic(key)
--> 732 self._validate_indexers(key)
    733 # Detect it can be mapped as an outer indexer
    734 # If all key is unlabeled, or
    735 # key can be mapped as an OuterIndexer.
    736 if all(not isinstance(k, Variable) for k in key):

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:784, in Variable._validate_indexers(self, key)
    779     raise IndexError(
    780         "{}-dimensional boolean indexing is "
    781         "not supported. ".format(k.ndim)
    782     )
    783 if is_duck_dask_array(k.data):
--> 784     raise KeyError(
    785         "Indexing with a boolean dask array is not allowed. "
    786         "This will result in a dask array of unknown shape. "
    787         "Such arrays are unsupported by Xarray."
    788         "Please compute the indexer first using .compute()"
    789     )
    790 if getattr(k, "dims", (dim,)) != (dim,):
    791     raise IndexError(
    792         "Boolean indexer should be unlabeled or on the "
    793         "same dimension to the indexed array. Indexer is "
   (...)
    796         )
    797     )

KeyError: 'Indexing with a boolean dask array is not allowed. This will result in a dask array of unknown shape. Such arrays are unsupported by Xarray.Please compute the indexer first using .compute()'
FariborzDaneshvar-NOAA commented 1 month ago

Second Error (NotImplementedError):

NotImplementedError                       Traceback (most recent call last)
Cell In[21], line 13
     10     training_set_adjusted = training_set.copy(deep=True)
     11 else:
     12     # make an adjusted training set for dry areas..
---> 13     training_set_adjusted = extrapolate_water_elevation_to_dry_areas(
     14         da=training_set,
     15         k_neighbors=k_neighbors,
     16         idw_order=idw_order,
     17         compute_headloss=True,
     18         mann_coef=mann_coef,
     19         u_ref=0.4,
     20         d_ref=1,
     21     )

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/ensembleperturbation/parsing/adcirc.py:893, in extrapolate_water_elevation_to_dry_areas(da, k_neighbors, idw_order, compute_headloss, mann_coef, u_ref, d_ref, min_depth)
    891 if k_neighbors == 1:
    892     headloss = dd * friction_factor  # hydraulic friction loss
--> 893     da_adjusted[run, null] = numpy.fmin(
    894         da[run, nodes[~null][nn]].values - headloss, max_allowable_values
    895     )
    896 else:
    897     for kk in range(k_neighbors):

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/dataarray.py:840, in DataArray.__setitem__(self, key, value)
    835 # DataArray key -> Variable key
    836 key = {
    837     k: v.variable if isinstance(v, DataArray) else v
    838     for k, v in self._item_key_to_dict(key).items()
    839 }
--> 840 self.variable[key] = value

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/variable.py:977, in Variable.__setitem__(self, key, value)
    974     value = np.moveaxis(value, new_order, range(len(new_order)))
    976 indexable = as_indexable(self._data)
--> 977 indexable[index_tuple] = value

File /nhc/Fariborz.Daneshvar/miniconda3/envs/nhc_colab/lib/python3.10/site-packages/xarray/core/indexing.py:1460, in DaskIndexingAdapter.__setitem__(self, key, value)
   1458 num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in key.tuple)
   1459 if num_non_slices > 1:
-> 1460     raise NotImplementedError(
   1461         "xarray can't set arrays with multiple "
   1462         "array indices to dask yet."
   1463     )
   1464 self.array[key.tuple] = value

NotImplementedError: xarray can't set arrays with multiple array indices to dask yet.