Ouranosinc / xscen

A climate change scenario-building analysis framework.
https://xscen.readthedocs.io/
Apache License 2.0
17 stars 2 forks source link

Adjust only works with dask array #362

Closed juliettelavoie closed 8 months ago

juliettelavoie commented 8 months ago

Setup Information

Description

xs.adjust only works with dask arrays.

Steps To Reproduce

Failing case:

import numpy as np
import xscen as xs
from xclim.testing.helpers import test_timeseries as timeseries

dref = timeseries(
    np.random.rand( 365*3),
    variable="tas",start="2001-01-01", freq="D", as_dataset=True
)

dsim = timeseries(
    np.random.rand( 365*6),
    variable="tas",
    start="2001-01-01",
    freq="D",
    as_dataset=True,
)

dtrain = xs.train(
            dref,
            dsim,
            var=["tas"],
            period=["2001", "2003"],
        )
out = xs.adjust(dtrain,
                dsim,
                periods=["2001", "2006"],
                )
out

gives:

RuntimeError                              Traceback (most recent call last)
File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/base.py:684, in map_blocks.<locals>._decorator.<locals>._map_blocks.<locals>._call_and_transpose_on_exit(dsblock, **f_kwargs)
    683     _decode_cf_coords(dsblock)
--> 684     func_out = func(dsblock, **f_kwargs).transpose(*all_dims)
    685 except Exception as err:

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/_adjustment.py:138, in dqm_adjust(ds, group, interp, kind, extrapolation, detrend)
    136     detrend = PolyDetrend(degree=detrend, kind=kind, group=group)
--> 138 detrend = detrend.fit(scaled_sim)
    139 ds["sim"] = detrend.detrend(scaled_sim)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/detrending.py:62, in BaseDetrend.fit(self, da)
     61 new = self.__class__(**self.parameters)
---> 62 new.set_dataset(new._get_trend(da).rename("trend").to_dataset())
     63 new.ds.trend.attrs["units"] = da.attrs.get("units", "")

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/detrending.py:170, in PolyDetrend._get_trend(self, da)
    168 def _get_trend(self, da):
    169     # Estimate trend over da
--> 170     trend = _polydetrend_get_trend(da, **self)
    171     return trend.trend

File <boltons.funcutils.FunctionBuilder-268>:2, in _map_blocks(ds, **kwargs)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/base.py:481, in parse_group.<locals>._parse_group(*f_args, **f_kwargs)
    480 f_kwargs = _update_kwargs(f_kwargs, allowed=allow_only)
--> 481 return func(*f_args, **f_kwargs)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/base.py:676, in map_blocks.<locals>._decorator.<locals>._map_blocks(ds, **kwargs)
    675 # Optimization to circumvent the slow pickle.dumps(cftime_array)
--> 676 for name, crd in ds.coords.items():
    677     if xr.core.common._contains_cftime_datetimes(crd.variable):  # noqa

File <frozen _collections_abc>:860, in __iter__(self)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xarray/core/coordinates.py:96, in AbstractCoordinates.__iter__(self)
     94 def __iter__(self) -> Iterator[Hashable]:
     95     # needs to be in the same order as the dataset variables
---> 96     for k in self.variables:
     97         if k in self._names:

RuntimeError: dictionary changed size during iteration

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Cell In[78], line 29
     20 #dref=ds_ref
     21 #dsim=ds
     23 dtrain = xs.train(
     24             dref,
     25             dsim,
     26             var=["tas"],
     27             period=["2001", "2003"],
     28         )
---> 29 out = xs.adjust(dtrain,
     30                 dsim,
     31                 periods=["2001", "2006"],
     32                 )
     33 out

File ~/Projets/xscen/xscen/config.py:220, in parse_config.<locals>._wrapper(*args, **kwargs)
    217 if CONFIG.get("print_it_all"):
    218     logger.debug(f"Modified kwargs : {kwargs}")
--> 220 return func(*args, **kwargs)

File ~/Projets/xscen/xscen/biasadjust.py:290, in adjust(dtrain, dsim, periods, xclim_adjust_args, to_level, bias_adjust_institution, bias_adjust_project, moving_yearly_window, align_on)
    287         xclim_adjust_args["detrend"] = getattr(sdba.detrending, name)(**kwargs)
    289     with xc.set_options(sdba_encode_cf=True, sdba_extra_output=False):
--> 290         out = ADJ.adjust(sim_sel, **xclim_adjust_args)
    291         slices.extend([out])
    292 # put all the adjusted period back together

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/adjustment.py:217, in TrainAdjust.adjust(self, sim, *args, **kwargs)
    214         self._check_inputs(sim, *args, group=self.group)
    216     sim = convert_units_to(sim, self.train_units)
--> 217 out = self._adjust(sim, *args, **kwargs)
    219 if isinstance(out, xr.DataArray):
    220     out = out.rename("scen").to_dataset()

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/adjustment.py:500, in DetrendedQuantileMapping._adjust(self, sim, interp, extrapolation, detrend)
    493 def _adjust(
    494     self,
    495     sim,
   (...)
    498     detrend=1,
    499 ):
--> 500     scen = dqm_adjust(
    501         self.ds.assign(sim=sim),
    502         interp=interp,
    503         extrapolation=extrapolation,
    504         detrend=detrend,
    505         group=self.group,
    506         kind=self.kind,
    507     ).scen
    508     # Detrending needs units.
    509     scen.attrs["units"] = sim.units

File <boltons.funcutils.FunctionBuilder-283>:2, in _map_blocks(ds, **kwargs)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/base.py:481, in parse_group.<locals>._parse_group(*f_args, **f_kwargs)
    478 @wraps(func)
    479 def _parse_group(*f_args, **f_kwargs):
    480     f_kwargs = _update_kwargs(f_kwargs, allowed=allow_only)
--> 481     return func(*f_args, **f_kwargs)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/base.py:703, in map_blocks.<locals>._decorator.<locals>._map_blocks(ds, **kwargs)
    700 tmpl = tmpl.drop_vars(extra_coords.keys(), errors="ignore")
    702 # Call
--> 703 out = ds.map_blocks(
    704     _call_and_transpose_on_exit, template=tmpl, kwargs=kwargs
    705 )
    706 # Add back the extra coords, but only those which have compatible dimensions (like xarray would have done)
    707 out = out.assign_coords(
    708     {
    709         name: crd
   (...)
    712     }
    713 )

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xarray/core/dataset.py:8766, in Dataset.map_blocks(self, func, args, kwargs, template)
   8664 """
   8665 Apply a function to each block of this Dataset.
   8666 
   (...)
   8762     a        (time) float64 192B dask.array<chunksize=(24,), meta=np.ndarray>
   8763 """
   8764 from xarray.core.parallel import map_blocks
-> 8766 return map_blocks(func, self, args, kwargs, template)

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xarray/core/parallel.py:409, in map_blocks(func, obj, args, kwargs, template)
    403         raise TypeError(
    404             "Cannot pass dask collections in kwargs yet. Please compute or "
    405             "load values before passing to map_blocks."
    406         )
    408 if not is_dask_collection(obj):
--> 409     return func(obj, *args, **kwargs)
    411 try:
    412     import dask

File /exec/jlavoie/.conda/xscen-dev/lib/python3.11/site-packages/xclim/sdba/base.py:686, in map_blocks.<locals>._decorator.<locals>._map_blocks.<locals>._call_and_transpose_on_exit(dsblock, **f_kwargs)
    684     func_out = func(dsblock, **f_kwargs).transpose(*all_dims)
    685 except Exception as err:
--> 686     raise ValueError(
    687         f"{func.__name__} failed on block with coords : {dsblock.coords}."
    688     ) from err
    689 return func_out

ValueError: dqm_adjust failed on block with coords : Coordinates:
  * quantiles  (quantiles) float64 120B 0.03333 0.1 0.1667 ... 0.8333 0.9 0.9667
  * dayofyear  (dayofyear) int64 3kB 1 2 3 4 5 6 7 ... 360 361 362 363 364 365
  * time       (time) object 18kB 2001-01-01 00:00:00 ... 2006-12-30 00:00:00.

working case:

import random 
import dask
from xclim.testing.helpers import test_timeseries as timeseries

dref = timeseries(
    dask.array.from_array(np.random.rand( 365*3), chunks=(365*3)),
    variable="tas",start="2001-01-01", freq="D", as_dataset=True
)

dsim = timeseries(
    np.random.rand( 365*6),
    variable="tas",
    start="2001-01-01",
    freq="D",
    as_dataset=True,
)

dtrain = xs.train(
            dref,
            dsim,
            var=["tas"],
            period=["2001", "2003"],
        )
out = xs.adjust(dtrain,
                dsim,
                periods=["2001", "2006"],
                )
out

Additional context

I can also get it to work with a numpy array if I change to with xc.set_options(sdba_encode_cf=False) on line 289.

Do we want to only accept dask arrays and write it clearly or add a test to turn to option to False if not a dask array?

Contribution

aulemahal commented 8 months ago

Darn. This is an xclim issue.

aulemahal commented 8 months ago

Will be avoidable with #363, and fixed by Ouranosinc/xclim#1674.