scverse / mudata

Multimodal Data (.h5mu) implementation for Python
https://mudata.rtfd.io
BSD 3-Clause "New" or "Revised" License
72 stars 16 forks source link

Slicing MuData across modalities #50

Closed DriessenA closed 3 weeks ago

DriessenA commented 10 months ago

When slicing MuData, slicing only rows or wanting more columns than are present only in the first modality causes 'IndexError: positional indexers are out-of-bounds'

To Reproduce IN mdata OUT MuData object with n_obs × n_vars = 13592 × 22076 obs: 'barcodes_assigned', 'CAR', 'reads', 'UMI', 'time', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt' var: 'gene_ids', 'feature_types' 2 modalities rna: 13592 x 22055 obs: 'barcodes_assigned', 'CAR', 'reads', 'UMI', 'time', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'log10umi' var: 'gene_ids', 'feature_types', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells' prot: 13592 x 21 obs: 'barcodes_assigned', 'CAR', 'reads', 'UMI', 'time', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt' var: 'gene_ids', 'feature_types' layers: 'dsb', 'log10', 'norm_denoised', 'norm'

IN mdata.shape OUT (13592, 22076)

This works: IN mdata[:100, :22055] OUT View of MuData object with n_obs × n_vars = 100 × 22055 obs: 'barcodes_assigned', 'CAR', 'reads', 'UMI', 'time', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt' var: 'gene_ids', 'feature_types' 2 modalities rna: 100 x 22055 obs: 'barcodes_assigned', 'CAR', 'reads', 'UMI', 'time', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'log10umi' var: 'gene_ids', 'feature_types', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells' prot: 100 x 0 obs: 'barcodes_assigned', 'CAR', 'reads', 'UMI', 'time', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt' var: 'gene_ids', 'feature_types' layers: 'dsb', 'log10', 'norm_denoised', 'norm'

This fails: IN mdata[:100, :22056] or IN mdata[:100, :] OUT `--------------------------------------------------------------------------- IndexError Traceback (most recent call last) File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/indexing.py:1587, in _iLocIndexer._get_list_axis(self, key, axis) 1586 try: -> 1587 return self.obj._take_with_is_copy(key, axis=axis) 1588 except IndexError as err: 1589 # re-raise with different error message

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/generic.py:3902, in NDFrame._take_with_is_copy(self, indices, axis) 3895 """ 3896 Internal version of the take method that sets the _is_copy 3897 attribute to keep track of the parent dataframe (using in indexing (...) 3900 See the docstring of take for full explanation of the parameters. 3901 """ -> 3902 result = self._take(indices=indices, axis=axis) 3903 # Maybe set copy if we didn't actually change the index.

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/generic.py:3886, in NDFrame._take(self, indices, axis, convert_indices) 3884 self._consolidate_inplace() -> 3886 new_data = self._mgr.take( 3887 indices, 3888 axis=self._get_block_manager_axis(axis), 3889 verify=True, 3890 convert_indices=convert_indices, 3891 ) 3892 return self._constructor(new_data).finalize(self, method="take")

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/internals/managers.py:975, in BaseBlockManager.take(self, indexer, axis, verify, convert_indices) 974 if convert_indices: --> 975 indexer = maybe_convert_indices(indexer, n, verify=verify) 977 new_labels = self.axes[axis].take(indexer)

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/indexers/utils.py:286, in maybe_convert_indices(indices, n, verify) 285 if mask.any(): --> 286 raise IndexError("indices are out-of-bounds") 287 return indices

IndexError: indices are out-of-bounds

The above exception was the direct cause of the following exception:

IndexError Traceback (most recent call last) Cell In[112], line 1 ----> 1 mdata[:100, :22056]

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/mudata/_core/mudata.py:388, in MuData.getitem(self, index) 386 return self.mod[index] 387 else: --> 388 return MuData(self, as_view=True, index=index)

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/mudata/_core/mudata.py:80, in MuData.init(self, data, feature_types_names, as_view, index, **kwargs) 78 self._init_common() 79 if as_view: ---> 80 self._init_as_view(data, index) 81 return 83 # Add all modalities to a MuData object

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/mudata/_core/mudata.py:197, in MuData._init_as_view(self, mudata_ref, index) 195 if len(cvaridx) == a.n_vars and np.all(np.diff(cvaridx) == 1): 196 cvaridx = slice(None) --> 197 self.mod[m] = a[cobsidx, cvaridx] 199 self._obs = DataFrameView(mudata_ref.obs.iloc[obsidx, :], view_args=(self, "obs")) 200 self._obsm = mudata_ref.obsm._view(self, (obsidx,))

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/anndata/_core/anndata.py:1109, in AnnData.getitem(self, index) 1107 """Returns a sliced view of the object.""" 1108 oidx, vidx = self._normalize_indices(index) -> 1109 return AnnData(self, oidx=oidx, vidx=vidx, asview=True)

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/anndata/_core/anndata.py:289, in AnnData.init(self, X, obs, var, uns, obsm, varm, layers, raw, dtype, shape, filename, filemode, asview, obsp, varp, oidx, vidx) 287 if not isinstance(X, AnnData): 288 raise ValueError("X has to be an AnnData object.") --> 289 self._init_as_view(X, oidx, vidx) 290 else: 291 self._init_as_actual( 292 X=X, 293 obs=obs, (...) 305 filemode=filemode, 306 )

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/anndata/_core/anndata.py:337, in AnnData._init_as_view(self, adata_ref, oidx, vidx) 335 # views on attributes of adata_ref 336 obs_sub = adata_ref.obs.iloc[oidx] --> 337 var_sub = adata_ref.var.iloc[vidx] 338 self._obsm = adata_ref.obsm._view(self, (oidx,)) 339 self._varm = adata_ref.varm._view(self, (vidx,))

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.getitem(self, key) 1070 axis = self.axis or 0 1072 maybe_callable = com.apply_if_callable(key, self.obj) -> 1073 return self._getitem_axis(maybe_callable, axis=axis)

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/indexing.py:1616, in _iLocIndexer._getitem_axis(self, key, axis) 1614 # a list of integers 1615 elif is_list_like_indexer(key): -> 1616 return self._get_list_axis(key, axis=axis) 1618 # a single integer 1619 else: 1620 key = item_from_zerodim(key)

File ~/Extra_data/miniconda3/envs/CAR_env/lib/python3.9/site-packages/pandas/core/indexing.py:1590, in _iLocIndexer._get_list_axis(self, key, axis) 1587 return self.obj._take_with_is_copy(key, axis=axis) 1588 except IndexError as err: 1589 # re-raise with different error message -> 1590 raise IndexError("positional indexers are out-of-bounds") from err

IndexError: positional indexers are out-of-bounds`

Expected behaviour Slice across modalities, if no number of features is given use all features. If number of features exceeds the shape of the first modality continue to the second.

System

Additional info Sometimes sllicing works once, but then gives the error on a second try/slice

gtca commented 10 months ago

This might have already been fixed in mudata v0.2.3, could you give it a try?

gtca commented 3 weeks ago

The described use case seems to work for MuData objects. If there's a particular scenario that still results in an error, please follow up with some steps to reproduce it!