scverse / mudata

Multimodal Data (.h5mu) implementation for Python
https://mudata.rtfd.io
BSD 3-Clause "New" or "Revised" License
72 stars 16 forks source link

Cannot create view of view with pandas 2.0 #39

Closed grst closed 1 year ago

grst commented 1 year ago

Describe the bug When subsetting a view again, an error is raised. This only happens in some cases. I wasn't able to pinpoint when, but I got a repex.

To Reproduce Steps to reproduce the behaviour.

from mudata import MuData
from anndata import AnnData
import pandas as pd

m = MuData(
    {
        "test": AnnData(obs=pd.DataFrame(index=list("ABCDE"))),
        "test2": AnnData(obs=pd.DataFrame(index=list("ABE"))),
    }
)
m[[1, 2, 3], :][[0, 1], :]
IndexError: arrays used as indices must be of integer (or boolean) type
Stacktrace ```pytb --------------------------------------------------------------------------- IndexError Traceback (most recent call last) Cell In[65], line 1 ----> 1 mdata[[1,2,3], :][[1,2], :] File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/mudata/_core/mudata.py:388, in MuData.__getitem__(self, index) 386 return self.mod[index] 387 else: --> 388 return MuData(self, as_view=True, index=index) File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/mudata/_core/mudata.py:80, in MuData.__init__(self, data, feature_types_names, as_view, index, **kwargs) 78 self._init_common() 79 if as_view: ---> 80 self._init_as_view(data, index) 81 return 83 # Add all modalities to a MuData object File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/mudata/_core/mudata.py:197, in MuData._init_as_view(self, mudata_ref, index) 195 if len(cvaridx) == a.n_vars and np.all(np.diff(cvaridx) == 1): 196 cvaridx = slice(None) --> 197 self.mod[m] = a[cobsidx, cvaridx] 199 self._obs = DataFrameView(mudata_ref.obs.iloc[obsidx, :], view_args=(self, "obs")) 200 self._obsm = mudata_ref.obsm._view(self, (obsidx,)) File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/anndata/_core/anndata.py:1101, in AnnData.__getitem__(self, index) 1099 """Returns a sliced view of the object.""" 1100 oidx, vidx = self._normalize_indices(index) -> 1101 return AnnData(self, oidx=oidx, vidx=vidx, asview=True) File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/anndata/_core/anndata.py:283, in AnnData.__init__(self, X, obs, var, uns, obsm, varm, layers, raw, dtype, shape, filename, filemode, asview, obsp, varp, oidx, vidx) 281 if not isinstance(X, AnnData): 282 raise ValueError("`X` has to be an AnnData object.") --> 283 self._init_as_view(X, oidx, vidx) 284 else: 285 self._init_as_actual( 286 X=X, 287 obs=obs, (...) 299 filemode=filemode, 300 ) File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/anndata/_core/anndata.py:322, in AnnData._init_as_view(self, adata_ref, oidx, vidx) 320 prev_oidx, prev_vidx = adata_ref._oidx, adata_ref._vidx 321 adata_ref = adata_ref._adata_ref --> 322 oidx, vidx = _resolve_idxs((prev_oidx, prev_vidx), (oidx, vidx), adata_ref) 323 # self._adata_ref is never a view 324 self._adata_ref = adata_ref File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/anndata/_core/views.py:266, in _resolve_idxs(old, new, adata) 265 def _resolve_idxs(old, new, adata): --> 266 t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1)) 267 return t File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/anndata/_core/views.py:266, in (.0) 265 def _resolve_idxs(old, new, adata): --> 266 t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1)) 267 return t File ~/anaconda3/envs/scirpy_dev/lib/python3.9/functools.py:877, in singledispatch..wrapper(*args, **kw) 873 if not args: 874 raise TypeError(f'{funcname} requires at least ' 875 '1 positional argument') --> 877 return dispatch(args[0].__class__)(*args, **kw) File ~/anaconda3/envs/scirpy_dev/lib/python3.9/site-packages/anndata/_core/views.py:279, in _resolve_idx_ndarray(old, new, l) 277 if is_bool_dtype(old): 278 old = np.where(old)[0] --> 279 return old[new] IndexError: arrays used as indices must be of integer (or boolean) type ```

Expected behaviour Subsetting a view works like subsetting a copy

System

Additional context This seems to be related to pandas 2.0. With 1.5.3 this works as expected.

gtca commented 1 year ago

Hey @grst,

It was a bit of a rabbit hole but it seems to have come down to the altered df.loc[:, colname] behaviour. The latest PR https://github.com/scverse/mudata/pull/43 should fix it.

grst commented 1 year ago

Actually, the above example is still a repex for my issue - it now just fails with a different error message:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[18], line 11
      3 import pandas as pd
      5 m = MuData(
      6     {
      7         "test": AnnData(obs=pd.DataFrame(index=list("ABCDE"))),
      8         "test2": AnnData(obs=pd.DataFrame(index=list("ABE"))),
      9     }
     10 )
---> 11 m[[1, 2, 3], :][[0, 1], :]

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/mudata/_core/mudata.py:385, in MuData.__getitem__(self, index)
    383     return self.mod[index]
    384 else:
--> 385     return MuData(self, as_view=True, index=index)

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/mudata/_core/mudata.py:80, in MuData.__init__(self, data, feature_types_names, as_view, index, **kwargs)
     78 self._init_common()
     79 if as_view:
---> 80     self._init_as_view(data, index)
     81     return
     83 # Add all modalities to a MuData object

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/mudata/_core/mudata.py:197, in MuData._init_as_view(self, mudata_ref, index)
    195         if len(cvaridx) == a.n_vars and np.all(np.diff(cvaridx) == 1):
    196             cvaridx = slice(None)
--> 197     self.mod[m] = a[cobsidx, cvaridx]
    199 self._obs = DataFrameView(mudata_ref.obs.iloc[obsidx, :], view_args=(self, "obs"))
    200 self._obsm = mudata_ref.obsm._view(self, (obsidx,))

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/anndata.py:1101, in AnnData.__getitem__(self, index)
   1099 """Returns a sliced view of the object."""
   1100 oidx, vidx = self._normalize_indices(index)
-> 1101 return AnnData(self, oidx=oidx, vidx=vidx, asview=True)

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/anndata.py:283, in AnnData.__init__(self, X, obs, var, uns, obsm, varm, layers, raw, dtype, shape, filename, filemode, asview, obsp, varp, oidx, vidx)
    281     if not isinstance(X, AnnData):
    282         raise ValueError("`X` has to be an AnnData object.")
--> 283     self._init_as_view(X, oidx, vidx)
    284 else:
    285     self._init_as_actual(
    286         X=X,
    287         obs=obs,
   (...)
    299         filemode=filemode,
    300     )

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/anndata.py:322, in AnnData._init_as_view(self, adata_ref, oidx, vidx)
    320     prev_oidx, prev_vidx = adata_ref._oidx, adata_ref._vidx
    321     adata_ref = adata_ref._adata_ref
--> 322     oidx, vidx = _resolve_idxs((prev_oidx, prev_vidx), (oidx, vidx), adata_ref)
    323 # self._adata_ref is never a view
    324 self._adata_ref = adata_ref

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/views.py:266, in _resolve_idxs(old, new, adata)
    265 def _resolve_idxs(old, new, adata):
--> 266     t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
    267     return t

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/views.py:266, in (.0)
    265 def _resolve_idxs(old, new, adata):
--> 266     t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
    267     return t

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/functools.py:889, in singledispatch..wrapper(*args, **kw)
    885 if not args:
    886     raise TypeError(f'{funcname} requires at least '
    887                     '1 positional argument')
--> 889 return dispatch(args[0].__class__)(*args, **kw)

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/views.py:279, in _resolve_idx_ndarray(old, new, l)
    277 if is_bool_dtype(old):
    278     old = np.where(old)[0]
--> 279 return old[new]

IndexError: index 1 is out of bounds for axis 0 with size 1
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[15], line 1
----> 1 m2[[0, 1], :]

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/mudata/_core/mudata.py:385, in MuData.__getitem__(self, index)
    383     return self.mod[index]
    384 else:
--> 385     return MuData(self, as_view=True, index=index)

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/mudata/_core/mudata.py:80, in MuData.__init__(self, data, feature_types_names, as_view, index, **kwargs)
     78 self._init_common()
     79 if as_view:
---> 80     self._init_as_view(data, index)
     81     return
     83 # Add all modalities to a MuData object

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/mudata/_core/mudata.py:197, in MuData._init_as_view(self, mudata_ref, index)
    195         if len(cvaridx) == a.n_vars and np.all(np.diff(cvaridx) == 1):
    196             cvaridx = slice(None)
--> 197     self.mod[m] = a[cobsidx, cvaridx]
    199 self._obs = DataFrameView(mudata_ref.obs.iloc[obsidx, :], view_args=(self, "obs"))
    200 self._obsm = mudata_ref.obsm._view(self, (obsidx,))

File /dev/shm/sturmgre/conda/scirpy/lib/python3.10/site-packages/anndata/_core/anndata.py:1101, in AnnData.__getitem__(self, index)
...
    277 if is_bool_dtype(old):
    278     old = np.where(old)[0]
--> 279 return old[new]

IndexError: index 1 is out of bounds for axis 0 with size 1

---------------------------------------------------------------------------
grst commented 1 year ago

ok, fair enough. This actually also fails with pandas 1.5.x. I'm back at investigating.

gtca commented 1 year ago

Hey @grst,

That you for debugging this with me. Another fix (https://github.com/scverse/mudata/pull/43) should resolve this!