STOmics / Stereopy

A toolkit of spatial transcriptomic analysis.
MIT License
201 stars 65 forks source link

Multi-Sample cell_bins data integrate error #348

Closed aadimator closed 1 week ago

aadimator commented 2 weeks ago

Hi, I've been trying to follow the official Multi-Sample tutorial to read in my adjusted.cellbin.h5ad files. And I'm getting the following error when run the model.integrate() part:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[5], [line 1](vscode-notebook-cell:?execution_count=5&line=1)
----> [1](vscode-notebook-cell:?execution_count=5&line=1) ms_data.integrate()
      [2](vscode-notebook-cell:?execution_count=5&line=2) ms_data

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\stereo\core\ms_data.py:732, in MSData.integrate(self, scope, remove_existed, **kwargs)
    [730](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/core/ms_data.py:730)     else:
    [731](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/core/ms_data.py:731)         batch_tags = [self._names.index(name) for name in self[scope].names]
--> [732](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/core/ms_data.py:732)     merged_data = merge(*data_list, var_type=self._var_type, batch_tags=batch_tags)
    [733](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/core/ms_data.py:733) else:
    [734](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/core/ms_data.py:734)     merged_data = deepcopy(data_list[0])

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\functools.py:875, in singledispatch.<locals>.wrapper(*args, **kw)
    [871](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/functools.py:871) if not args:
    [872](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/functools.py:872)     raise TypeError(f'{funcname} requires at least '
    [873](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/functools.py:873)                     '1 positional argument')
--> [875](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/functools.py:875) return dispatch(args[0].__class__)(*args, **kw)

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\stereo\utils\data_helper.py:457, in __merge_for_ann_based_stereo_exp_data(reorganize_coordinate, horizontal_offset_additional, vertical_offset_additional, space_between, var_type, batch_tags, *data_list)
    [454](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:454)                 elif key == 'resolution':
    [455](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:455)                     attr['resolution'] = value
--> [457](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:457) adata_merged = ad.concat(
    [458](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:458)     adata_list,
    [459](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:459)     join='inner' if var_type != 'union' else 'outer',
    [460](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:460)     axis=0,
    [461](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:461)     label='batch',
    [462](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:462)     keys=batches,
    [463](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:463)     index_unique='-',
    [464](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:464)     merge='first',
    [465](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:465)     uns_merge='first'
    [466](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:466) )
    [467](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:467) bin_type = data_list[0].bin_type
    [468](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/stereo/utils/data_helper.py:468) bin_size = data_list[0].bin_size

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\anndata\_core\merge.py:1036, in concat(adatas, axis, join, merge, uns_merge, label, keys, index_unique, fill_value, pairwise)
   [1033](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1033)     concat_annot[label] = label_col
   [1035](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1035) # Annotation for other axis
-> [1036](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1036) alt_annot = merge_dataframes(
   [1037](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1037)     [getattr(a, alt_dim) for a in adatas], alt_indices, merge
   [1038](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1038) )
   [1040](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1040) X = concat_Xs(adatas, reindexers, axis=axis, fill_value=fill_value)
   [1042](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:1042) if join == "inner":

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\anndata\_core\merge.py:729, in merge_dataframes(dfs, new_index, merge_strategy)
    [726](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:726) def merge_dataframes(
    [727](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:727)     dfs: Iterable[pd.DataFrame], new_index, merge_strategy=merge_unique
    [728](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:728) ) -> pd.DataFrame:
--> [729](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:729)     dfs = [df.reindex(index=new_index) for df in dfs]
    [730](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:730)     # New dataframe with all shared data
    [731](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:731)     new_df = pd.DataFrame(merge_strategy(dfs), index=new_index)

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\anndata\_core\merge.py:729, in <listcomp>(.0)
    [726](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:726) def merge_dataframes(
    [727](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:727)     dfs: Iterable[pd.DataFrame], new_index, merge_strategy=merge_unique
    [728](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:728) ) -> pd.DataFrame:
--> [729](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:729)     dfs = [df.reindex(index=new_index) for df in dfs]
    [730](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:730)     # New dataframe with all shared data
    [731](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/anndata/_core/merge.py:731)     new_df = pd.DataFrame(merge_strategy(dfs), index=new_index)

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\util\_decorators.py:347, in rewrite_axis_style_signature.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    [345](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/util/_decorators.py:345) @wraps(func)
    [346](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/util/_decorators.py:346) def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> [347](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/util/_decorators.py:347)     return func(*args, **kwargs)

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\frame.py:5205, in DataFrame.reindex(self, *args, **kwargs)
   [5203](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5203) kwargs.pop("axis", None)
   [5204](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5204) kwargs.pop("labels", None)
-> [5205](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5205) return super().reindex(**kwargs)

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\generic.py:5289, in NDFrame.reindex(self, *args, **kwargs)
   [5286](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5286)     return self._reindex_multi(axes, copy, fill_value)
   [5288](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5288) # perform the reindex on the axes
-> [5289](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5289) return self._reindex_axes(
   [5290](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5290)     axes, level, limit, tolerance, method, fill_value, copy
   [5291](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5291) ).__finalize__(self, method="reindex")

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\frame.py:5004, in DataFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   [5002](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5002) index = axes["index"]
   [5003](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5003) if index is not None:
-> [5004](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5004)     frame = frame._reindex_index(
   [5005](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5005)         index, method, copy, level, fill_value, limit, tolerance
   [5006](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5006)     )
   [5008](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5008) return frame

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\frame.py:5023, in DataFrame._reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
   [5010](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5010) def _reindex_index(
   [5011](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5011)     self,
   [5012](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5012)     new_index,
   (...)
   [5018](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5018)     tolerance=None,
   [5019](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5019) ):
   [5020](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5020)     new_index, indexer = self.index.reindex(
   [5021](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5021)         new_index, method=method, level=level, limit=limit, tolerance=tolerance
   [5022](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5022)     )
-> [5023](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5023)     return self._reindex_with_indexers(
   [5024](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5024)         {0: [new_index, indexer]},
   [5025](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5025)         copy=copy,
   [5026](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5026)         fill_value=fill_value,
   [5027](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5027)         allow_dups=False,
   [5028](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/frame.py:5028)     )

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\generic.py:5355, in NDFrame._reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
   [5352](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5352)     indexer = ensure_platform_int(indexer)
   [5354](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5354) # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi)
-> [5355](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5355) new_data = new_data.reindex_indexer(
   [5356](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5356)     index,
   [5357](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5357)     indexer,
   [5358](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5358)     axis=baxis,
   [5359](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5359)     fill_value=fill_value,
   [5360](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5360)     allow_dups=allow_dups,
   [5361](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5361)     copy=copy,
   [5362](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5362) )
   [5363](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5363) # If we've made a copy once, no need to make another one
   [5364](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/generic.py:5364) copy = False

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\internals\managers.py:737, in BaseBlockManager.reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, only_slice, use_na_proxy)
    [735](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/internals/managers.py:735) # some axes don't allow reindexing with dups
    [736](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/internals/managers.py:736) if not allow_dups:
--> [737](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/internals/managers.py:737)     self.axes[axis]._validate_can_reindex(indexer)
    [739](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/internals/managers.py:739) if axis >= self.ndim:
    [740](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/internals/managers.py:740)     raise IndexError("Requested axis not found in manager")

File c:\Users\mraadam\AppData\Local\miniforge3\envs\stereo_pdac\lib\site-packages\pandas\core\indexes\base.py:4316, in Index._validate_can_reindex(self, indexer)
   [4314](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/indexes/base.py:4314) # trying to reindex on an axis with duplicates
   [4315](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/indexes/base.py:4315) if not self._index_as_unique and len(indexer):
-> [4316](file:///C:/Users/mraadam/AppData/Local/miniforge3/envs/stereo_pdac/lib/site-packages/pandas/core/indexes/base.py:4316)     raise ValueError("cannot reindex on an axis with duplicate labels")

ValueError: cannot reindex on an axis with duplicate labels

Also, when reading in the data, we can see that the genes aren't being intersected as they were in the demo example: image I thought that they are supposed to be just the common genes in this step, as in the official example.

Here's my full code:

import warnings
import shutil

import stereo as st
import pandas as pd
from pyprojroot import here
from loguru import logger
from stereo.core.ms_data import MSData
from stereo.core.ms_pipeline import slice_generator

from stereo_pdac.config import get_config

warnings.filterwarnings('ignore')

config = get_config()

print(f"Stereopy version: {st.__version__}")

# Get sample metadata
metadata = pd.read_csv(here() / config.data_raw / "sample_metadata.csv")
library_ids = metadata["Chip ID"].unique().tolist()

data_dir = here() / config.data_interim

# Get all files using the pattern: data_dir / $library_id / cell_correct / *.h5ad
files = []
for library_id in library_ids:
    files.extend((data_dir / library_id / "cell_correct").glob("*.h5ad"))

ms_data = MSData(_relationship='other', _var_type='intersect')

for file in files:
    ms_data += st.io.read_h5ad(file_path=file, bin_type='cell_bins')

ms_data.names = library_ids
ms_data.integrate()
ms_data

I'm using Windows 11, and I've installed the Stereopy v1.4.0.

aadimator commented 1 week ago

I've been able to solve this error by adding the following lines before integration:

data_list = []

for library_id, file_path in zip(library_ids, files):
    data = st.io.read_h5ad(file_path=file_path, bin_type='cell_bins')
    # Had to this, otherwise it was throwing an error during integration
    data.adata.obs['orig.ident'] = library_id
    data.adata.var_names_make_unique()
    data.adata.obs_names_make_unique()
    data.tl.raw_checkpoint()
    data_list.append(data)

# Create MSData object
ms_data = MSData(_data_list=data_list, _names=library_ids, _relationship='other', _var_type='intersect')

ms_data.integrate()