cerfacs-globc / icclim

icclim: Python library for climate indices and climate indicators calculation.
https://icclim.readthedocs.io/en/latest/
Apache License 2.0
80 stars 32 forks source link

DOC: Example of thresholds functionality? #277

Closed DamienIrving closed 7 months ago

DamienIrving commented 1 year ago

Description

I'm trying to use the thresholds functionality described here and not having success, so I was wondering if it would be possible to add an example to the documentation?

Minimal reproducible example

In this example (see full notebook), I want to calculate the TX90p index for a future period using the same 90th percentile thresholds that I used for an historical period.

import icclim
import xarray as xr

hist_files = [
    'tasmax_historical_day_19800101-19801231.nc',
    'tasmax_historical_day_19810101-19811231.nc',
    'tasmax_historical_day_19820101-19821231.nc',
]
ds_hist = xr.open_mfdataset(hist_files)

icclim.index(
    index_name="TX90p",
    in_files=ds_hist,
    var_name="tasmax",
    slice_mode="year",
    out_file='TX90p.nc',
    save_thresholds=True,
)

ds_thresholds = xr.open_dataset('TX90p.nc')
ds_thresholds = ds_thresholds[['tasmax_thresholds']].squeeze()
ds_thresholds = ds_thresholds.rename({'tasmax_thresholds': 'tasmax'})

ssp_files = [
    'tasmax_ssp370_day_20950101-20951231.nc',
    'tasmax_ssp370_day_20960101-20961231.nc',
    'tasmax_ssp370_day_20970101-20971231.nc',
]
ds_ssp = xr.open_mfdataset(ssp_files)

icclim.index(
    index_name="TX90p",
    in_files={'tasmax': ds_ssp, 'thresholds': ds_thresholds},
    slice_mode="year",
    out_file='result.nc',
)

Output received

2023-08-25 16:42:09,330 --- icclim 6.4.0
2023-08-25 16:42:09,333 --- BEGIN EXECUTION
2023-08-25 16:42:09,334 Processing: 0%

---------------------------------------------------------------------------
MergeError                                Traceback (most recent call last)
Cell In [8], line 1
----> 1 icclim.index(
      2     index_name="TX90p",
      3     in_files={'tasmax': ds_ssp, 'thresholds': ds_thresholds},
      4     slice_mode="year",
      5     out_file='result.nc',
      6 )

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/main.py:413, in index(***failed resolving arguments***)
    411 elif isinstance(threshold, Sequence):
    412     threshold = [build_configured_threshold(t) for t in threshold]
--> 413 climate_vars_dict = read_in_files(
    414     in_files=in_files,
    415     var_names=var_name,
    416     threshold=threshold,
    417     standard_index=standard_index,
    418 )
    419 # We use groupby instead of resample when there is a single variable that must be
    420 # compared to its reference period values.
    421 is_compared_to_reference = _must_add_reference_var(
    422     climate_vars_dict, base_period_time_range
    423 )

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/models/climate_variable.py:216, in read_in_files(in_files, var_names, threshold, standard_index)
    213         return in_files
    214     else:
    215         # case of in_files={tasmax: "tasmax.nc"}
--> 216         return _build_in_file_dict(
    217             in_files=list(in_files.values()),
    218             standard_index=standard_index,
    219             threshold=threshold,
    220             var_names=list(in_files.keys()),
    221         )
    222 else:
    223     # case of in_files="tasmax.nc" and var_names="tasmax"
    224     return _build_in_file_dict(in_files, var_names, threshold, standard_index)

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/models/climate_variable.py:236, in _build_in_file_dict(in_files, var_names, threshold, standard_index)
    227 def _build_in_file_dict(
    228     in_files: InFileBaseType,
    229     var_names: Sequence[str],
    230     threshold: Threshold | Sequence[Threshold] | None,
    231     standard_index: StandardIndex | None,
    232 ):
    233     standard_var = (
    234         standard_index.input_variables[0] if standard_index is not None else None
    235     )
--> 236     input_dataset = read_dataset(
    237         in_files=in_files, standard_var=standard_var, var_name=var_names
    238     )
    239     var_names = guess_var_names(
    240         ds=input_dataset, standard_index=standard_index, var_names=var_names
    241     )
    242     if threshold is not None:

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/icclim/pre_processing/input_parsing.py:127, in read_dataset(in_files, standard_var, var_name)
    125     ds = xr.open_zarr(in_files)
    126 elif isinstance(in_files, (list, tuple)):
--> 127     return xr.merge(
    128         [
    129             read_dataset(in_file, standard_var, var_name[i])
    130             for i, in_file in enumerate(in_files)
    131         ]
    132     )
    133 else:
    134     raise NotImplementedError(
    135         f"`in_files` format {type(in_files)} was not" f" recognized."
    136     )

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:1025, in merge(objects, compat, join, fill_value, combine_attrs)
   1022     obj = obj.to_dataset(promote_attrs=True) if isinstance(obj, DataArray) else obj
   1023     dict_like_objects.append(obj)
-> 1025 merge_result = merge_core(
   1026     dict_like_objects,
   1027     compat,
   1028     join,
   1029     combine_attrs=combine_attrs,
   1030     fill_value=fill_value,
   1031 )
   1032 return Dataset._construct_direct(**merge_result._asdict())

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:757, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value)
    755 collected = collect_variables_and_indexes(aligned, indexes=indexes)
    756 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
--> 757 variables, out_indexes = merge_collected(
    758     collected, prioritized, compat=compat, combine_attrs=combine_attrs
    759 )
    761 dims = calculate_dimensions(variables)
    763 coord_names, noncoord_names = determine_coords(coerced)

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:302, in merge_collected(grouped, prioritized, compat, combine_attrs, equals)
    300 variables = [variable for variable, _ in elements_list]
    301 try:
--> 302     merged_vars[name] = unique_variable(
    303         name, variables, compat, equals.get(name, None)
    304     )
    305 except MergeError:
    306     if compat != "minimal":
    307         # we need more than "minimal" compatibility (for which
    308         # we drop conflicting coordinates)

File /g/data/xv83/dbi599/miniconda3/envs/icclim/lib/python3.10/site-packages/xarray/core/merge.py:156, in unique_variable(name, variables, compat, equals)
    153                 break
    155 if not equals:
--> 156     raise MergeError(
    157         f"conflicting values for variable {name!r} on objects to be combined. "
    158         "You can skip this check by specifying compat='override'."
    159     )
    161 if combine_method:
    162     for var in variables[1:]:

MergeError: conflicting values for variable 'tasmax' on objects to be combined. You can skip this check by specifying compat='override'.
bzah commented 8 months ago

Hi @DamienIrving and sorry for the late reply. To me it seems the error is due to the merge of the 3 ssp370 files. I guess there is an issue with the coordinates or attributes of tasmax variable in one of the file. Or, icclim uses the wrong merge strategy in which case we need to fix this.

I can only find aggregated data for tasmax_day_ACCESS-ESM1-5_ssp370_r6i1p1f1_gn_20650101-21001231.nc. If you have a link to the 3 ssp files that you used, that would be helpful to reproduce the issue. Otherwise I will try to do it with another dataset.

bzah commented 8 months ago

After some experimentation, this is indeed not an issue with icclim. I was able to reproduce the error with the following MRE, using xarray only. The MergeError is raised if the datasets subject to the merge (the 3 ssp370 datasets in your case) have overlapping dimensions and incompatible values for the overlaps.

In the case below, this is cause by the time dimension overlapping for year 2044 in both dataset and by having different values for these overlaps (1 in da1 and 2 in da2).

import numpy as np
import xarray as xr
import pandas as pd

da1 = xr.DataArray(
    data=(np.full(365 * 5, 1).reshape((365 * 2, 1, 1))),
    dims=["time", "lat", "lon"],
    coords={
        "lat": [42],
        "lon": [42],
        "time": pd.date_range("2043-01-01", periods=365 * 2, freq="D"),
    },
    attrs={"units": "K"},
    name="tas",
)

da2 = xr.DataArray(
    data=(np.full(365 * 5, 2).reshape((365 * 2, 1, 1))),
    dims=["time", "lat", "lon"],
    coords={
        "lat": [42],
        "lon": [42],
        "time": pd.date_range("2043-01-01", periods=365 * 2, freq="D"),
    },
    attrs={"units": "K"},
    name="tas",
)

xr.merge([da1, da2])
# ^ MergeError

Stacktrace:

Traceback (most recent call last):
  File "/home/bzah/workspace/cerfacs/icclim/mre_277.py", line 29, in <module>
    xr.merge([da1, da2])
  File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 996, in merge
    merge_result = merge_core(
                   ^^^^^^^^^^^
  File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 720, in merge_core
    variables, out_indexes = merge_collected(
                             ^^^^^^^^^^^^^^^^
  File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 290, in merge_collected
    merged_vars[name] = unique_variable(
                        ^^^^^^^^^^^^^^^^
  File "/home/bzah/micromamba/envs/icclim-dev/lib/python3.11/site-packages/xarray/core/merge.py", line 144, in unique_variable
    raise MergeError(
xarray.core.merge.MergeError: conflicting values for variable 'tas' on objects to be combined. You can skip this check by specifying compat='override'.