xCDAT / xcdat

An extension of xarray for climate data analysis on structured grids.
https://xcdat.readthedocs.io/en/latest/
Apache License 2.0
101 stars 11 forks source link

[Bug]: Improve error message when `temporal.departures()` `reference_period` arg is invalid #665

Open tomvothecoder opened 3 weeks ago

tomvothecoder commented 3 weeks ago

What happened?

The error message when the reference_period arg is not valid is not clear and can confuse the end-user (AttributeError: 'IndexVariable' object has no attribute 'month').

What did you expect to happen? Are there are possible answers you came across?

There should be logic to detect whether there are time coordinates that cover the entire reference period. If there are missing time coordinates, raise an error early on about this (e.g.,"Invalid reference_period selected. The time coordinates for this dataset span <START, END>."

Minimal Complete Verifiable Example (MVCE)

# %%
import cftime
import numpy as np
import xarray as xr

import xcdat as xc

time_yearly = xr.DataArray(
    data=np.array(
        [
            cftime.DatetimeGregorian(2000, 7, 1, 12, 0, 0, 0, has_year_zero=False),
            cftime.DatetimeGregorian(2001, 7, 1, 12, 0, 0, 0, has_year_zero=False),
            cftime.DatetimeGregorian(2002, 7, 1, 12, 0, 0, 0, has_year_zero=False),
        ],
        dtype=object,
    ),
    dims=["time"],
    attrs={
        "axis": "T",
        "long_name": "time",
        "standard_name": "time",
        "bounds": "time_bnds",
    },
)

time_bnds_yearly = xr.DataArray(
    name="time_bnds",
    data=np.array(
        [
            [
                cftime.DatetimeGregorian(2000, 1, 1, 0, 0, 0, 0, has_year_zero=False),
                cftime.DatetimeGregorian(2001, 1, 1, 0, 0, 0, 0, has_year_zero=False),
            ],
            [
                cftime.DatetimeGregorian(2001, 1, 1, 0, 0, 0, 0, has_year_zero=False),
                cftime.DatetimeGregorian(2002, 1, 1, 0, 0, 0, 0, has_year_zero=False),
            ],
            [
                cftime.DatetimeGregorian(2002, 1, 1, 0, 0, 0, 0, has_year_zero=False),
                cftime.DatetimeGregorian(2003, 1, 1, 0, 0, 0, 0, has_year_zero=False),
            ],
        ],
        dtype=object,
    ),
    dims=["time", "bnds"],
    attrs={
        "xcdat_bounds": "True",
    },
)
lat = xr.DataArray(
    data=np.array([-90, -88.75, 88.75, 90]),
    dims=["lat"],
    attrs={"units": "degrees_north", "axis": "Y", "standard_name": "latitude"},
)
lon = xr.DataArray(
    data=np.array([0, 1.875, 356.25, 358.125]),
    dims=["lon"],
    attrs={"units": "degrees_east", "axis": "X", "standard_name": "longitude"},
)
ts_decoded = xr.DataArray(
    name="ts",
    data=np.ones((3, 4, 4)),
    coords={"time": time_yearly, "lat": lat, "lon": lon},
    dims=["time", "lat", "lon"],
)
ds = xr.Dataset(
    data_vars={"ts": ts_decoded, "time_bnds": time_bnds_yearly},
    coords={"lat": lat.copy(), "lon": lon.copy(), "time": time_yearly},
)

# %%
ds = ds.temporal.departures(
    "ts", freq="month", reference_period=("2009-01-01", "2010-01-01")
)

Relevant log output

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/miniconda3/envs/xcdat_scipy_2024/lib/python3.11/site-packages/xarray/core/dataarray.py:863, in DataArray._getitem_coord(self, key)
    862 try:
--> 863     var = self._coords[key]
    864 except KeyError:

KeyError: 'time.month'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-9-03cd046f6c77> in ?()
      1 # %%
----> 2 ds = ds.temporal.departures(
      3     "ts", freq="month", reference_period=("2009-01-01", "2010-01-01")
      4 )

~/repositories/xcdat/xcdat/temporal.py in ?(self, data_var, freq, weighted, keep_weights, reference_period, season_config)
    766                 keep_weights,
    767                 season_config,
    768             )
    769 
--> 770         ds_climo = ds.temporal.climatology(
    771             data_var,
    772             freq,
    773             weighted,

~/repositories/xcdat/xcdat/temporal.py in ?(self, data_var, freq, weighted, keep_weights, reference_period, season_config)
    567         }
    568         """
    569         self._set_data_var_attrs(data_var)
    570 
--> 571         return self._averager(
    572             data_var,
    573             "climatology",
    574             freq,

~/repositories/xcdat/xcdat/temporal.py in ?(self, data_var, mode, freq, weighted, keep_weights, reference_period, season_config)
    834 
    835         if self._mode == "average":
    836             dv_avg = self._average(ds, data_var)
    837         elif self._mode in ["group_average", "climatology", "departures"]:
--> 838             dv_avg = self._group_average(ds, data_var)
    839 
    840         # The original time dimension is dropped from the dataset because
    841         # it becomes obsolete after the data variable is averaged. When the

~/repositories/xcdat/xcdat/temporal.py in ?(self, ds, data_var)
   1194         dv = _get_data_var(ds, data_var)
   1195 
   1196         # Label the time coordinates for grouping weights and the data variable
   1197         # values.
-> 1198         self._labeled_time = self._label_time_coords(dv[self.dim])
   1199 
   1200         if self._weighted:
   1201             time_bounds = ds.bounds.get_bounds("T", var_key=data_var)

~/repositories/xcdat/xcdat/temporal.py in ?(self, time_coords)
   1369         >>>       dtype='datetime64[ns]')
   1370         >>> Coordinates:
   1371         >>> * time     (time) datetime64[ns] 2000-01-01T00:00:00 ... 2000-04-01T00:00:00
   1372         """
-> 1373         df_dt_components: pd.DataFrame = self._get_df_dt_components(time_coords)
   1374         dt_objects = self._convert_df_to_dt(df_dt_components)
   1375 
   1376         time_grouped = xr.DataArray(

~/repositories/xcdat/xcdat/temporal.py in ?(self, time_coords)
   1423 
   1424         # Use the TIME_GROUPS dictionary to determine which components
   1425         # are needed to form the labeled time coordinates.
   1426         for component in TIME_GROUPS[self._mode][self._freq]:
-> 1427             df[component] = time_coords[f"{self.dim}.{component}"].values
   1428 
   1429         # The season frequency requires additional datetime components for
   1430         # processing, which are later removed before time coordinates are

/opt/miniconda3/envs/xcdat_scipy_2024/lib/python3.11/site-packages/xarray/core/dataarray.py in ?(self, key)
    870     def __getitem__(self, key: Any) -> Self:
    871         if isinstance(key, str):
--> 872             return self._getitem_coord(key)
    873         else:
    874             # xarray-style array indexing
    875             return self.isel(indexers=self._item_key_to_dict(key))

/opt/miniconda3/envs/xcdat_scipy_2024/lib/python3.11/site-packages/xarray/core/dataarray.py in ?(self, key)
    862         try:
    863             var = self._coords[key]
    864         except KeyError:
    865             dim_sizes = dict(zip(self.dims, self.shape))
--> 866             _, key, var = _get_virtual_variable(self._coords, key, dim_sizes)
    867 
    868         return self._replace_maybe_drop_dims(var, name=key)
...
    219     virtual_var = Variable(ref_var.dims, data)
    220 
    221     return ref_name, var_name, virtual_var

AttributeError: 'IndexVariable' object has no attribute 'month'

Anything else we need to know?

No response

Environment

Latest version of main and stable version (0.7.0)