ECMWFCode4Earth / ml_drought

Machine learning to better predict and understand drought. Moving github.com/ml-clim
https://ml-clim.github.io/drought-prediction/
90 stars 18 forks source link

`` *** ValueError: index must be monotonic for resampling #114

Open tommylees112 opened 5 years ago

tommylees112 commented 5 years ago
from src.analysis import VegetationDeficitIndex

model_ds = xr.open_mfdataset((data_dir / 'models' / 'one_month_forecast' / 'previous_month' / '*.nc').as_posix())
model_ds = model_ds.sortby('time')
model_ds.to_netcdf(out_dir / 'previous_month_prediction.nc')
model_da = model_ds.preds

index = VegetationDeficitIndex(out_dir / 'previous_month_prediction.nc')

Error Traceback

In [107]: index = VegetationDeficitIndex(out_dir / 'previous_month_prediction.nc')
     ...:
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-107-74c71383fa2a> in <module>
----> 1 index = VegetationDeficitIndex(out_dir / 'previous_month_prediction.nc')

~/github/ml_drought/src/analysis/indices/vegetation_deficit_index.py in __init__(self, file_path, rolling_window, resample_str)
     51         super().__init__(
     52             file_path=file_path,
---> 53             resample_str=resample_str
     54         )
     55

~/github/ml_drought/src/analysis/indices/base.py in __init__(self, file_path, resample_str)
     33             self.resample = True
     34             self.resample_str = resample_str
---> 35             self.ds = self.resample_ds_mean()
     36
     37     def resample_ds_mean(self) -> xr.Dataset:

~/github/ml_drought/src/analysis/indices/base.py in resample_ds_mean(self)
     51         assert self.resample_str in [k for k in lookup.keys()], \
     52             f'resample_str must be one of: {[k for k in lookup.keys()]}'
---> 53         return self.ds.resample(time=f'{lookup[self.resample_str]}').mean()
     54
     55     def save(self, data_dir: Path = Path('data')):

~/miniconda3/envs/crop/lib/python3.7/site-packages/xarray/core/common.py in resample(self, indexer, skipna, closed, label, base, keep_attrs, loffset, restore_coord_dims, **indexer_kwargs)
    845                                        grouper=grouper,
    846                                        resample_dim=RESAMPLE_DIM,
--> 847                                        restore_coord_dims=restore_coord_dims)
    848
    849         return resampler

~/miniconda3/envs/crop/lib/python3.7/site-packages/xarray/core/resample.py in __init__(self, *args, **kwargs)
    237                              "cannot have the same name as actual dimension "
    238                              "('{}')! ".format(self._resample_dim, self._dim))
--> 239         super(DatasetResample, self).__init__(*args, **kwargs)
    240
    241     def apply(self, func, args=(), **kwargs):

~/miniconda3/envs/crop/lib/python3.7/site-packages/xarray/core/groupby.py in __init__(self, obj, group, squeeze, grouper, bins, restore_coord_dims, cut_kwargs)
    261             if not index.is_monotonic:
    262                 # TODO: sort instead of raising an error
--> 263                 raise ValueError('index must be monotonic for resampling')
    264             full_index, first_items = self._get_index_and_items(
    265                 index, grouper)

ValueError: index must be monotonic for resampling

SO HELP

tommylees112 commented 5 years ago

One solution is to simply not resample at all (probably the easiest solution)

index = VegetationDeficitIndex(out_dir / 'previous_month_prediction.nc', resample_str=None)

it probably has something to do with the fact that we are reading the data from a mfdataset saved xr.Dataset object. But this is surprising because it is purposefully written to disk and then reread by the index object ...