Feature request: skipna keyword

aaronspring commented 2 years ago

in xarray many functions drop indexes where NaN is the value.

in bias_correction I sometimes get ValueError: array must not contain infs or NaNs from https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html

This happens when my input data has NaNs.

Traceback:

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/bias_correction.py in correct(self, method, lower_limit, cdf_threshold, vectorize, dask, **apply_ufunc_kwargs)
    259             )
    260         elif method == "normal_mapping":
--> 261             corrected = xr.apply_ufunc(
    262                 normal_correction,
    263                 self.obs_data,

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
   1160     # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1161     elif any(is_dict_like(a) for a in args):
-> 1162         return apply_dataset_vfunc(
   1163             variables_vfunc,
   1164             *args,

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, *args)
    448     args = [getattr(arg, "data_vars", arg) for arg in args]
    449 
--> 450     result_vars = apply_dict_of_variables_vfunc(
    451         func, *args, signature=signature, join=dataset_join, fill_value=fill_value
    452     )

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_dict_of_variables_vfunc(func, signature, join, fill_value, *args)
    392     result_vars = {}
    393     for name, variable_args in zip(names, grouped_by_name):
--> 394         result_vars[name] = func(*variable_args)
    395 
    396     if signature.num_outputs > 1:

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
    740             )
    741 
--> 742     result_data = func(*input_data)
    743 
    744     if signature.num_outputs == 1:

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in __call__(self, *args, **kwargs)
   2161             vargs.extend([kwargs[_n] for _n in names])
   2162 
-> 2163         return self._vectorize_call(func=func, args=vargs)
   2164 
   2165     def _get_ufunc_and_otypes(self, func, args):

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in _vectorize_call(self, func, args)
   2235         """Vectorized call to `func` over positional `args`."""
   2236         if self.signature is not None:
-> 2237             res = self._vectorize_call_with_signature(func, args)
   2238         elif not args:
   2239             res = func()

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in _vectorize_call_with_signature(self, func, args)
   2275 
   2276         for index in np.ndindex(*broadcast_shape):
-> 2277             results = func(*(arg[index] for arg in args))
   2278 
   2279             n_results = len(results) if isinstance(results, tuple) else 1

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/bias_correction.py in normal_correction(obs_data, mod_data, sce_data, cdf_threshold)
    136     obs_len, mod_len, sce_len = [len(x) for x in [obs_data, mod_data, sce_data]]
    137     obs_mean, mod_mean, sce_mean = [x.mean() for x in [obs_data, mod_data, sce_data]]
--> 138     obs_detrended, mod_detrended, sce_detrended = [
    139         detrend(x) for x in [obs_data, mod_data, sce_data]
    140     ]

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/bias_correction.py in <listcomp>(.0)
    137     obs_mean, mod_mean, sce_mean = [x.mean() for x in [obs_data, mod_data, sce_data]]
    138     obs_detrended, mod_detrended, sce_detrended = [
--> 139         detrend(x) for x in [obs_data, mod_data, sce_data]
    140     ]
    141     obs_norm, mod_norm, sce_norm = [

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/scipy/signal/signaltools.py in detrend(data, axis, type, bp, overwrite_data)
   3482             A[:, 0] = np.cast[dtype](np.arange(1, Npts + 1) * 1.0 / Npts)
   3483             sl = slice(bp[m], bp[m + 1])
-> 3484             coef, resids, rank, s = linalg.lstsq(A, newdata[sl])
   3485             newdata[sl] = newdata[sl] - np.dot(A, coef)
   3486         # Put data back in original shape.

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/scipy/linalg/basic.py in lstsq(a, b, cond, overwrite_a, overwrite_b, check_finite, lapack_driver)
   1145     """
   1146     a1 = _asarray_validated(a, check_finite=check_finite)
-> 1147     b1 = _asarray_validated(b, check_finite=check_finite)
   1148     if len(a1.shape) != 2:
   1149         raise ValueError('Input array a should be 2D')

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/scipy/_lib/_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
    291             raise ValueError('masked arrays are not supported')
    292     toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 293     a = toarray(a)
    294     if not objects_ok:
    295         if a.dtype is np.dtype('O'):

~/anaconda3/envs/climpred-dev/lib/python3.9/site-packages/numpy/lib/function_base.py in asarray_chkfinite(a, dtype, order)
    486     a = asarray(a, dtype=dtype, order=order)
    487     if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 488         raise ValueError(
    489             "array must not contain infs or NaNs")
    490     return a

ValueError: array must not contain infs or NaNs

How to fix: have a check for NaNs and drop them in reference and model or data_to_be_corrected

Yassminaa commented 2 years ago

I wonder if this issue is solved?

aaronspring commented 2 years ago

No. Isn’t

Anjali-Thomas95 commented 1 year ago

Hi, I am facing the same trouble. If there is NaN values in my data, the bias correction won't work properly.

aaronspring commented 1 year ago

You could try to drop NaNs in all arrays where one array has NaNs before using bias_correction. Once that works you could share that code here and we can merged it with the skipna keyword. PR welcome.

Yassminaa commented 1 year ago

the point is NANs come because the timeseries of the model and historical and refernce is not equal. e.g. if you have historical and reference data for 1980-2015, the model 2016 -2022. in this case you deal with non equal timeseries. to make it consistent in time you have to fill the data in nan in long timeseries 1980-2022.

aaronspring commented 1 year ago

Right. So in xarray logic usually the non overlapping part would be dropped. Would be nice to implement that here as well.

pankajkarman / bias_correction

Feature request: skipna keyword #11