pydata / xarray

N-D labeled arrays and datasets in Python
https://xarray.dev
Apache License 2.0
3.63k stars 1.09k forks source link

Setting an `xindex` causes `.where` to fail #9697

Closed max-sixty closed 3 weeks ago

max-sixty commented 3 weeks ago

What happened?

I'm trying to use more xindex rather than MultiIndex, in pursuit of the new world :)

But it seems that that causes .where to fail?

MCVE & traceback below

What did you expect to happen?

I don't see why this shouldn't work — possibly I'm misunderstanding something?

Minimal Complete Verifiable Example

import numpy as np
import xarray as xr
ds = xr.tutorial.open_dataset("air_temperature")

ds = ds.assign_coords(lat2=ds.lat + 2).set_xindex('lat2').where(lambda x: x)

MVCE confirmation

Relevant log output

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[4], line 1
----> 1 ds = ds.assign_coords(lat2=ds.lat + 2).set_xindex('lat2').where(lambda x: x)

File ~/workspace/xarray/xarray/core/common.py:1233, in DataWithCoords.where(self, cond, other, drop)
   1230     self = self.isel(**indexers)
   1231     cond = cond.isel(**indexers)
-> 1233 return ops.where_method(self, cond, other)

File ~/workspace/xarray/xarray/core/ops.py:179, in where_method(self, cond, other)
    177 # alignment for three arguments is complicated, so don't support it yet
    178 join = "inner" if other is dtypes.NA else "exact"
--> 179 return apply_ufunc(
    180     duck_array_ops.where_method,
    181     self,
    182     cond,
    183     other,
    184     join=join,
    185     dataset_join=join,
    186     dask="allowed",
    187     keep_attrs=True,
    188 )

File ~/workspace/xarray/xarray/core/computation.py:1252, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value,
keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, on_missing_core_dim, *args)
   1250 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1251 elif any(is_dict_like(a) for a in args):
-> 1252     return apply_dataset_vfunc(
   1253         variables_vfunc,
   1254         *args,
   1255         signature=signature,
   1256         join=join,
   1257         exclude_dims=exclude_dims,
   1258         dataset_join=dataset_join,
   1259         fill_value=dataset_fill_value,
   1260         keep_attrs=keep_attrs,
   1261         on_missing_core_dim=on_missing_core_dim,
   1262     )
   1263 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
   1264 elif any(isinstance(a, DataArray) for a in args):

File ~/workspace/xarray/xarray/core/computation.py:509, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, on_missing_core_dim, *ar
gs)
    505 objs = _all_of_type(args, Dataset)
    507 if len(args) > 1:
    508     args = tuple(
--> 509         deep_align(
    510             args,
    511             join=join,
    512             copy=False,
    513             exclude=exclude_dims,
    514             raise_on_invalid=False,
    515         )
    516     )
    518 list_of_coords, list_of_indexes = build_output_coords_and_indexes(
    519     args, signature, exclude_dims, combine_attrs=keep_attrs
    520 )
    521 args = tuple(getattr(arg, "data_vars", arg) for arg in args)

File ~/workspace/xarray/xarray/core/alignment.py:947, in deep_align(objects, join, copy, indexes, exclude, raise_on_invalid, fill_value)
    944     else:
    945         out.append(variables)
--> 947 aligned = align(
    948     *targets,
    949     join=join,
    950     copy=copy,
    951     indexes=indexes,
    952     exclude=exclude,
    953     fill_value=fill_value,
    954 )
    956 for position, key, aligned_obj in zip(positions, keys, aligned, strict=True):
    957     if key is no_key:

File ~/workspace/xarray/xarray/core/alignment.py:883, in align(join, copy, indexes, exclude, fill_value, *objects)
    687 """
    688 Given any number of Dataset and/or DataArray objects, returns new
    689 objects with aligned indexes and dimension sizes.
   (...)
    873
    874 """
    875 aligner = Aligner(
    876     objects,
    877     join=join,
   (...)
    881     fill_value=fill_value,
    882 )
--> 883 aligner.align()
    884 return aligner.results

File ~/workspace/xarray/xarray/core/alignment.py:574, in Aligner.align(self)
    572 self.find_matching_indexes()
    573 self.find_matching_unindexed_dims()
--> 574 self.assert_no_index_conflict()
    575 self.align_indexes()
    576 self.assert_unindexed_dim_sizes_equal()

File ~/workspace/xarray/xarray/core/alignment.py:318, in Aligner.assert_no_index_conflict(self)
    314 if dup:
    315     items_msg = ", ".join(
    316         f"{k!r} ({v} conflicting indexes)" for k, v in dup.items()
    317     )
--> 318     raise ValueError(
    319         "cannot re-index or align objects with conflicting indexes found for "
    320         f"the following {msg}: {items_msg}\n"
    321         "Conflicting indexes may occur when\n"
    322         "- they relate to different sets of coordinate and/or dimension names\n"
    323         "- they don't have the same type\n"
    324         "- they may be used to reindex data along common dimensions"
    325     )

ValueError: cannot re-index or align objects with conflicting indexes found for the following dimensions: 'lat' (2 conflicting indexes)
Conflicting indexes may occur when
- they relate to different sets of coordinate and/or dimension names
- they don't have the same type
- they may be used to reindex data along common dimensions

Anything else we need to know?

No response

Environment

INSTALLED VERSIONS ------------------ commit: 0c6cded4aa37337570ca788d1510e1063e713e10 python: 3.11.10 (main, Sep 7 2024, 01:03:31) [Clang 15.0.0 (clang-1500.3.9.4)] python-bits: 64 OS: Darwin OS-release: 23.6.0 machine: arm64 processor: arm byteorder: little LC_ALL: en_US.UTF-8 LANG: None LOCALE: ('en_US', 'UTF-8') libhdf5: 1.14.3 libnetcdf: 4.9.2 xarray: 2024.9.1.dev32+gece582dd pandas: 2.2.2 numpy: 2.0.2 scipy: 1.14.1 netCDF4: 1.7.1.post2 pydap: None h5netcdf: 1.3.0 h5py: 3.11.0 zarr: 2.18.3 cftime: 1.6.4 nc_time_axis: 1.4.1 iris: None bottleneck: 1.4.0 dask: 2024.8.2 distributed: 2024.8.2 matplotlib: 3.9.2 cartopy: None seaborn: 0.13.2 numbagg: 0.8.1 fsspec: 2024.9.0 cupy: None pint: None sparse: None flox: 0.9.12 numpy_groupies: 0.11.2 setuptools: 69.2.0 pip: 24.0 conda: None pytest: 8.3.3 mypy: 1.11.2 IPython: 8.24.0 sphinx: None
mathause commented 3 weeks ago

7695 is related

max-sixty commented 3 weeks ago

Ah great, I think this is a dupe of that, so I'll close.

Though this is a nice example where, IIUC, the behavior doesn't make sense — they're exactly the same indexes, they came from the same object...