scverse / scanpy

Single-cell analysis in Python. Scales to >1M cells.
https://scanpy.readthedocs.io
BSD 3-Clause "New" or "Revised" License
1.93k stars 604 forks source link

Error in running scrublet #3070

Open dicklim opened 6 months ago

dicklim commented 6 months ago

Please make sure these conditions are met

What happened?

Hi, I am running the Scrublet function to remove doublets. the annadata was generated by concat several samples from two articles:

[1] Wu F, Fan J, He Y, et al. Single-cell profiling of tumor heterogeneity and the microenvironment in advanced non-small cell lung cancer[J]. Nature Communications, 2021, 12(1): 2540. [2] Wang Y, Chen D, Liu Y, et al. Multidirectional characterization of cellular composition and spatial architecture in human multiple primary lung cancers[J]. Cell Death & Disease, 2023, 14(7): 462.

However, there was an error I cann't handle.

Minimal code sample

# 240520鳞癌,不用
# lung_ti_p1 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047623_P1_T_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_tm_p1 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047624_P1_N_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_ni_p1 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047624_P1_N_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_nm_p1 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047626_P1_N_R_M'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# 240520 去掉癌旁,只用癌
lung_ti_p2 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047627_P2_T_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
lung_tm_p2 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047629_P2_T_R_M'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_ni_p2 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047628_P2_N_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_nm_p2 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047630_P2_N_R_M'), var_names='gene_symbols', cache=True, cache_compression='gzip')
lung_ti1_P3 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047631_P8_T1_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
lung_tm1_P3 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047634_P8_T1_R_M'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_ni_P3 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047633_P8_N_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# lung_nm_P3 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047636_P8_N_R_M'), var_names='gene_symbols', cache=True, cache_compression='gzip')
lung_ti2_P3 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047632_P8_T2_R_I'), var_names='gene_symbols', cache=True, cache_compression='gzip')
lung_tm2_P3 = sc.read_10x_mtx(os.path.join(root, 'GSE200972_RAW', 'GSM6047635_P8_T2_R_M'), var_names='gene_symbols', cache=True, cache_compression='gzip')
# https://cloud.tencent.com/developer/article/2385592这儿得转置一下,不然不对
lung_ti_p4 = sc.read_text(os.path.join(root, 'GSE200972_RAW', 'GSM6047637_P4-2T1_matrix.tsv.gz')).T
# lung_ni_p4 = sc.read_text(os.path.join(root, 'GSE200972_RAW', 'GSM6047638_P4-2T2_matrix.tsv.gz')).T
lung_ts1_p4 = sc.read_text(os.path.join(root, 'GSE200972_RAW', 'GSM6047639_P4-2N_matrix.tsv.gz')).T
lung_ts2_p4 = sc.read_text(os.path.join(root, 'GSE200972_RAW', 'GSM6047640_P4-1T_matrix.tsv.gz')).T
# lung_ns_p4 = sc.read_text(os.path.join(root, 'GSE200972_RAW', 'GSM6047641_P4-1N_matrix.tsv.gz')).T

# 240520
lung_P5 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453580_P5_exp.txt.gz').T
lung_P8 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453583_P8_exp.txt.gz').T
lung_P16 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453591_P16_exp.txt.gz').T
lung_P24 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453599_P24_exp.txt.gz').T
lung_P28 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453603_P28_exp.txt.gz').T
lung_P38 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453613_P38_exp.txt.gz').T
lung_P20 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453595_P20_exp.txt.gz').T
lung_P9 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453584_P9_exp.txt.gz').T
lung_P33 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453608_P33_exp.txt.gz').T
lung_P13 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453588_P13_exp.txt.gz').T
lung_P21 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453596_P21_exp.txt.gz').T
lung_P32 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453607_P32_exp.txt.gz').T
lung_P35 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453610_P35_exp.txt.gz').T
lung_P2 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453577_P2_exp.txt.gz').T
lung_P39 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453614_P39_exp.txt.gz').T
lung_P29 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453604_P29_exp.txt.gz').T
lung_P34 = sc.read_text('D:\课题\博士课题\单细胞分析\GSE148071_RAW\GSM4453609_P34_exp.txt.gz').T

alldata = lung_ti_p2.concatenate(lung_tm_p2,
                                 lung_ti1_P3,lung_tm1_P3,lung_ti2_P3,lung_tm2_P3,
                                 lung_ti_p4,lung_ts1_p4,lung_ts2_p4,
                                 lung_P2, lung_P5, lung_P8, lung_P9,
                                 lung_P13, lung_P16,
                                 lung_P20, lung_P21, lung_P24, lung_P28, lung_P29,
                                 lung_P32, lung_P33, lung_P34, lung_P35, lung_P38, lung_P39,
                                 batch_categories = ["TI-P2", "TM-P2",
                                                     "TI1-P3", 'TM1-P3', 'TI2-P3', 'TM2-P3',
                                                     "TI-P4",'TS1-P4','TS2-P4',
                                                     'lung_P2', 'lung_P5', 'lung_P8', 'lung_P9',
                                                     'lung_P13', 'lung_P16',
                                                     'lung_P20', 'lung_P21', 'lung_P24', 'lung_P28', 'lung_P29',
                                                     'lung_P32', 'lung_P33', 'lung_P34', 'lung_P35', 'lung_P38', 'lung_P39'],
                                 join='outer')

print('Begin of post doublets removal and QC plot')
sc.pp.scrublet(alldata, n_neighbors=10)
alldata = alldata[alldata.obs['predicted_doublet']==False, :].copy()
n1 = alldata.shape[0]
print(f'Cells retained after scrublet: {n1}, {n0-n1} removed.')
print(f'End of post doublets removal and QC plots.')

Error output

Begin of post doublets removal and QC plot
Running Scrublet
normalizing counts per cell

C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_normalization.py:233: UserWarning: Some cells have zero counts
  warn(UserWarning("Some cells have zero counts"))

    finished (0:00:00)
WARNING: adata.X seems to be already log-transformed.
extracting highly variable genes

C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_simple.py:377: RuntimeWarning: invalid value encountered in log1p
  np.log1p(X, out=X)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[59], line 2
      1 print('Begin of post doublets removal and QC plot')
----> 2 sc.pp.scrublet(alldata, n_neighbors=10)
      3 alldata = alldata[alldata.obs['predicted_doublet']==False, :].copy()
      4 n1 = alldata.shape[0]

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\legacy_api_wrap\__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_scrublet\__init__.py:282, in scrublet(adata, adata_sim, batch_key, sim_doublet_ratio, expected_doublet_rate, stdev_doublet_rate, synthetic_doublet_umi_subsampling, knn_dist_metric, normalize_variance, log_transform, mean_center, n_prin_comps, use_approx_neighbors, get_doublet_neighbor_parents, n_neighbors, threshold, verbose, copy, random_state)
    279     adata.uns["scrublet"]["batched_by"] = batch_key
    281 else:
--> 282     scrubbed = _run_scrublet(adata_obs, adata_sim)
    284     # Copy outcomes to input object from our processed version
    286     adata.obs["doublet_score"] = scrubbed["obs"]["doublet_score"]

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_scrublet\__init__.py:204, in scrublet.<locals>._run_scrublet(ad_obs, ad_sim)
    201 # HVG process needs log'd data.
    203 logged = pp.log1p(ad_obs, copy=True)
--> 204 pp.highly_variable_genes(logged)
    205 ad_obs = ad_obs[:, logged.var["highly_variable"]].copy()
    207 # Simulate the doublets based on the raw expressions from the normalised
    208 # and filtered object.

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\legacy_api_wrap\__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_highly_variable_genes.py:648, in highly_variable_genes(***failed resolving arguments***)
    645 del min_disp, max_disp, min_mean, max_mean, n_top_genes
    647 if batch_key is None:
--> 648     df = _highly_variable_genes_single_batch(
    649         adata, layer=layer, cutoff=cutoff, n_bins=n_bins, flavor=flavor
    650     )
    651 else:
    652     df = _highly_variable_genes_batched(
    653         adata, batch_key, layer=layer, cutoff=cutoff, n_bins=n_bins, flavor=flavor
    654     )

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_highly_variable_genes.py:281, in _highly_variable_genes_single_batch(adata, layer, cutoff, n_bins, flavor)
    279 # all of the following quantities are "per-gene" here
    280 df = pd.DataFrame(dict(zip(["means", "dispersions"], (mean, dispersion))))
--> 281 df["mean_bin"] = _get_mean_bins(df["means"], flavor, n_bins)
    282 disp_stats = _get_disp_stats(df, flavor)
    284 # actually do the normalization

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\scanpy\preprocessing\_highly_variable_genes.py:307, in _get_mean_bins(means, flavor, n_bins)
    304 else:
    305     raise ValueError('`flavor` needs to be "seurat" or "cell_ranger"')
--> 307 return pd.cut(means, bins=bins)

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\pandas\core\reshape\tile.py:258, in cut(x, bins, right, labels, retbins, precision, include_lowest, duplicates, ordered)
    255 if sz == 0:
    256     raise ValueError("Cannot cut empty array")
--> 258 rng = (nanops.nanmin(x), nanops.nanmax(x))
    259 mn, mx = (mi + 0.0 for mi in rng)
    261 if np.isinf(mn) or np.isinf(mx):
    262     # GH 24314

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\pandas\core\nanops.py:147, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    145         result = alt(values, axis=axis, skipna=skipna, **kwds)
    146 else:
--> 147     result = alt(values, axis=axis, skipna=skipna, **kwds)
    149 return result

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\pandas\core\nanops.py:404, in _datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
    401 if datetimelike and mask is None:
    402     mask = isna(values)
--> 404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    406 if datetimelike:
    407     result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\pandas\core\nanops.py:1089, in _nanminmax.<locals>.reduction(values, axis, skipna, mask)
   1086 if values.size == 0:
   1087     return _na_for_min_count(values, axis)
-> 1089 values, mask = _get_values(
   1090     values, skipna, fill_value_typ=fill_value_typ, mask=mask
   1091 )
   1092 result = getattr(values, meth)(axis)
   1093 result = _maybe_null_out(result, axis, mask, values.shape)

File C:\ProgramData\Anaconda3\envs\dl\lib\site-packages\pandas\core\nanops.py:316, in _get_values(values, skipna, fill_value, fill_value_typ, mask)
    314 if datetimelike or _na_ok_dtype(dtype):
    315     values = values.copy()
--> 316     np.putmask(values, mask, fill_value)
    317 else:
    318     # np.where will promote if needed
    319     values = np.where(~mask, values, fill_value)

TypeError: putmask: first argument must be an array

Versions

``` scanpy 1.10.1 numpy 1.26.0 ```
ilan-gold commented 6 months ago

@dicklim Can you make a smaller and reproducible example? Also could you add more details of your environment? For example:

import scanpy as sc
adata = sc.datasets.pbmc3k()
sc.pp.scrublet(adata, n_neighbors=10)

works fine with the details you provided.

dicklim commented 3 days ago

Hi, sorry that I haven't reply timely. However, after several mounths, I can't solve the problem, and the error still occured for several times. Here I want to process the data from Kim et al. 2020. GSE131907

The matrix file can be downloaded in GSE131907_Lung_Cancer_raw_UMI_matrix.txt

I also exported a file before scrublet, then you can reproduct the Error. However the file is so large that I can only uploaded it to the BaiduNetDisk. If you can download the h5ad file, you can just run the scrublet part.

I greatly appreciate your efforts in developing Scanpy. I look forward to hearing from you soon!

The enviroument is : scanpy==1.10.3 anndata==0.10.9 umap==0.5.3 numpy==1.26.4 scipy==1.13.1 pandas==2.2.2 scikit-learn==1.0.2 statsmodels==0.14.2 igraph==0.11.6 pynndescent==0.5.10 The process code is shown below:

import os
import glob
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import anndata as ad
import matplotlib
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
import re
# pd.set_option('mode.use_inf_as_na', True)
# import SCCAF
import infercnvpy as cnv
import cytotrace2_py

sc.settings.cachedir = 'D:/BaiduSyncdisk/博士课题/scRNA/Kim/cache'  # 缓存的单细胞数据
sc.settings.figdir = 'D:/BaiduSyncdisk/博士课题/scRNA/Kim/figure'  # 图片输出
sc.settings.logpath = 'D:/BaiduSyncdisk/博士课题/scRNA/Kim/log'  # 日志
sc.settings.max_memory = 50  # 最大内存
sc.settings.n_jobs = 12  # 最大线程
sc.settings.verbosity = 1  # 设置日志等级: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

# plt.rcParams['pdf.fonttype'] = 'truetype'
config = {
    "font.family":'Arial',  # 设置字体类型
    "axes.unicode_minus": False, #解决负号无法显示的问题
    'pdf.fonttype': 'truetype'
}
matplotlib.rcParams.update(config)

sc.settings.set_figure_params(
    dpi=80, dpi_save=600,fontsize=12,
    figsize=(4, 4),
    transparent =True,
    vector_friendly=True,
    format='pdf')

# Raw counts
alldata = sc.read_text('GSE131907_Lung_Cancer_raw_UMI_matrix.txt.gz').T
# alldata.write(LUAD_Kim.h5ad")

alldata.obs['sample'] = alldata.obs.index
alldata.obs['sample'] = alldata.obs['sample'].apply(lambda x: '_'.join(x.split('_')[1:]))

# 只要转移灶加原发灶
alldata = alldata[(alldata.obs['sample'].str.startswith('LUNG_T')) | # 肺肿瘤
                  (alldata.obs['sample'].str.startswith('NS')) | # 远处脑转移
                  (alldata.obs['sample'].str.startswith('EBUS')) | # EBUS 测的转移性淋巴结和肺肿瘤
                  (alldata.obs['sample'].str.startswith('BRONCHO')),:]  # BRONCHO 测的转移性淋巴结和肺肿瘤

# 提取一下样本种类
category_dict = {
    'EBUS_':'EBUS',
    'LUNG_T':'Tumor',
    'BRONCHO_':'Borncho',
    'NS_':'BrainMetastasis'
}

alldata.obs['sample_type'] = alldata.obs['sample'].apply(lambda x:category_dict[x[:-2]])

## QC
alldata.var[alldata.var_names.str.match(r'^MT-')]
alldata.var[alldata.var_names.str.match(r'^RP[SL0-9]')]
alldata.var[alldata.var_names.str.contains(("^HB[^(P)^(E)]"))]

alldata.var['mt'] = alldata.var_names.str.match(r'^MT-') # 线粒体基因
alldata.var['ribo'] = alldata.var_names.str.match(r'^RP[SL0-9]')  # 核糖体基因
alldata.var['hb'] = alldata.var_names.str.contains(("^HB[^(P)^(E)]")) # 血红蛋白基因

sc.pp.calculate_qc_metrics(alldata, qc_vars=['ribo', 'hb','mt'], inplace=True, percent_top=[20], log1p=True)

mito_filter = 15
n_counts_filter = 4300
fig, axs = plt.subplots(nrows = 2, ncols = 2, figsize = (8, 8))
sc.pl.scatter(alldata, x='total_counts', y='pct_counts_mt',ax = axs[0, 0], show=False)
sc.pl.scatter(alldata, x='total_counts', y='pct_counts_ribo',ax = axs[0, 1], show = False)
sc.pl.scatter(alldata, x='total_counts', y='pct_counts_hb',ax = axs[1, 0], show = False)
sc.pl.scatter(alldata, x='total_counts', y='n_genes_by_counts',ax = axs[1, 1], show = False)
#draw horizontal red lines indicating thresholds.
axs[0, 0].hlines(y = mito_filter, xmin = 0, xmax = max(alldata.obs['total_counts']), color = 'red', ls = 'dashed')
axs[0, 1].hlines(y = mito_filter, xmin = 0, xmax = max(alldata.obs['total_counts']), color = 'red', ls = 'dashed')
axs[1, 0].hlines(y = mito_filter, xmin = 0, xmax = max(alldata.obs['total_counts']), color = 'red', ls = 'dashed')
axs[1, 1].hlines(y = n_counts_filter, xmin = 0, xmax = max(alldata.obs['total_counts']), color = 'red', ls = 'dashed')
fig.tight_layout()
plt.show()

# Original QC plot
n0 = alldata.shape[0]
print(f'Original cell number: {n0}')

# Start scrublet You can load the h5ad file here.
# alldata=sc.read_h5ad("D:/BaiduSyncdisk/博士课题/scRNA/36368318/Kim/LUAD_Kim_ForScrublet_DeBug.h5ad")

print('Begin of post doublets removal and QC plot')
sc.pp.scrublet(alldata)
alldata = alldata[alldata.obs['predicted_doublet']==False, :].copy()
n1 = alldata.shape[0]
print(f'Cells retained after scrublet: {n1}, {n0-n1} removed.')
print(f'End of post doublets removal and QC plots.')

The error log is :

Begin of post doublets removal and QC plot

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 2
      1 print('Begin of post doublets removal and QC plot')
----> 2 sc.pp.scrublet(alldata)
      3 alldata = alldata[alldata.obs['predicted_doublet']==False, :].copy()
      4 n1 = alldata.shape[0]

File ~\.conda\envs\DL\lib\site-packages\legacy_api_wrap\__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File ~\.conda\envs\DL\lib\site-packages\scanpy\preprocessing\_scrublet\__init__.py:285, in scrublet(adata, adata_sim, batch_key, sim_doublet_ratio, expected_doublet_rate, stdev_doublet_rate, synthetic_doublet_umi_subsampling, knn_dist_metric, normalize_variance, log_transform, mean_center, n_prin_comps, use_approx_neighbors, get_doublet_neighbor_parents, n_neighbors, threshold, verbose, copy, random_state)
    282     adata.uns["scrublet"]["batched_by"] = batch_key
    284 else:
--> 285     scrubbed = _run_scrublet(adata_obs, adata_sim)
    287     # Copy outcomes to input object from our processed version
    289     adata.obs["doublet_score"] = scrubbed["obs"]["doublet_score"]

File ~\.conda\envs\DL\lib\site-packages\scanpy\preprocessing\_scrublet\__init__.py:211, in scrublet.<locals>._run_scrublet(ad_obs, ad_sim)
    206 ad_obs = ad_obs[:, ad_obs.var["highly_variable"]].copy()
    208 # Simulate the doublets based on the raw expressions from the normalised
    209 # and filtered object.
--> 211 ad_sim = scrublet_simulate_doublets(
    212     ad_obs,
    213     layer="raw",
    214     sim_doublet_ratio=sim_doublet_ratio,
    215     synthetic_doublet_umi_subsampling=synthetic_doublet_umi_subsampling,
    216     random_seed=random_state,
    217 )
    218 del ad_obs.layers["raw"]
    219 if log_transform:

File ~\.conda\envs\DL\lib\site-packages\legacy_api_wrap\__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File ~\.conda\envs\DL\lib\site-packages\scanpy\preprocessing\_scrublet\__init__.py:552, in scrublet_simulate_doublets(adata, layer, sim_doublet_ratio, synthetic_doublet_umi_subsampling, random_seed)
    549 X = _get_obs_rep(adata, layer=layer)
    550 scrub = Scrublet(X, random_state=random_seed)
--> 552 scrub.simulate_doublets(
    553     sim_doublet_ratio=sim_doublet_ratio,
    554     synthetic_doublet_umi_subsampling=synthetic_doublet_umi_subsampling,
    555 )
    557 adata_sim = AnnData(scrub._counts_sim)
    558 adata_sim.obs["n_counts"] = scrub._total_counts_sim

File ~\.conda\envs\DL\lib\site-packages\scanpy\preprocessing\_scrublet\core.py:233, in Scrublet.simulate_doublets(self, sim_doublet_ratio, synthetic_doublet_umi_subsampling)
    230 n_obs = self._counts_obs.shape[0]
    231 n_sim = int(n_obs * sim_doublet_ratio)
--> 233 pair_ix = sample_comb((n_obs, n_obs), n_sim, random_state=self._random_state)
    235 E1 = cast(sparse.csc_matrix, self._counts_obs[pair_ix[:, 0], :])
    236 E2 = cast(sparse.csc_matrix, self._counts_obs[pair_ix[:, 1], :])

File ~\.conda\envs\DL\lib\site-packages\scanpy\preprocessing\_utils.py:157, in sample_comb(dims, nsamp, random_state, method)
    147 def sample_comb(
    148     dims: tuple[int, ...],
    149     nsamp: int,
   (...)
    154     ] = "auto",
    155 ) -> NDArray[np.int64]:
    156     """Randomly sample indices from a grid, without repeating the same tuple."""
--> 157     idx = sample_without_replacement(
    158         np.prod(dims), nsamp, random_state=random_state, method=method
    159     )
    160     return np.vstack(np.unravel_index(idx, dims)).T

File sklearn\utils\_random.pyx:220, in sklearn.utils._random.sample_without_replacement()

File sklearn\utils\_random.pyx:274, in sklearn.utils._random.sample_without_replacement()

File sklearn\utils\_random.pyx:28, in sklearn.utils._random._sample_without_replacement_check_input()

ValueError: n_population should be greater than 0, got -1272468767.