Open pcm32 opened 1 year ago
As suggested by Ming:
def scanpy_qc(adata, batch, up_range, down_range, **kwarg): adata.var[‘mt’] = adata.var_names.str.startswith(‘MT-’) adata.var[‘ribo’] = adata.var_names.str.startswith((‘RPS’,‘RPL’)) sc.pp.calculate_qc_metrics(adata, qc_vars=[‘mt’, ‘ribo’], log1p=False, percent_top=None, inplace=True) # sc.pl.violin(adata, keys=[‘n_genes_by_counts’,‘pct_counts_mt’, ‘pct_counts_ribo’, ‘total_counts’], # jitter=0.4, multi_panel=True, groupby=batch, **kwarg) # sc.pl.scatter(adata, x=‘total_counts’, y=‘n_genes_by_counts’, color=batch) batch_selected_cell = list() for qc in [‘pct_counts_mt’, ‘pct_counts_ribo’, ‘total_counts’, ‘n_genes_by_counts’]: if len(adata.obs[batch].unique()) > 1: selected = adata.obs.groupby(batch).apply(lambda x: (x[qc] >= x[qc].quantile(down_range)) & \ (x[qc] <= x[qc].quantile(up_range))).reset_index() batch_selected_cell.append(selected.loc[selected[qc]][‘level_1’].tolist()) else: batch_selected_cell = adata[(adata.obs[qc] > adata.obs[qc].quantile(down_range)) & \ (adata.obs[qc] < adata.obs[qc].quantile(up_range)), :].obs_names all_selected_cell = batch_selected_cell if len(batch_selected_cell) == 1 else list(set.intersection(*map(set,batch_selected_cell))) adata = adata[all_selected_cell,:] return adata
the first parts will happen elsewhere, and we could split the AnnData outside of this function through whichever condition is considered adequate.
As suggested by Ming:
the first parts will happen elsewhere, and we could split the AnnData outside of this function through whichever condition is considered adequate.