Open sparkumr opened 6 months ago
I don't clear how you run find_marker_genes
with multi-sample, could you show me your codes?
import sys import os from natsort import natsorted import stereo as st from stereo.core.ms_data import MSData from stereo.core.ms_pipeline import slice_generator import matplotlib.pyplot as plt import dill import warnings from stereo.algorithm.co_occurrence import CoOccurrence from stereo.core.ms_pipeline import slice_generator import pandas as pd import numpy as np warnings.filterwarnings('ignore')
data_dir = '/public/home/sangchl/work_result/Spatial_Transcriptomics/register/3/intergrate_sc_spa/data'
data_list=[]
for fn in os.listdir(data_dir):
data_list.append(os.path.join(data_dir, fn))
data_list = natsorted(data_list)
ms_data = MSData(_relationship='other', _var_type='intersect')
for sample in data_list:
ms_data += st.io.read_gef(file_path=sample, bin_type='cell_bins')
ms_data.names=['Control','Ccnk']
ms_data.integrate()
ms_data.tl.cal_qc(scope=slice_generator[:],mode='integrate')
ms_data.plt.spatial_scatter(
scope=slice_generator[:],
mode='integrate',
plotting_scale_width=10, # the width of scale
reorganize_coordinate=4, # the number of plots in each row
horizontal_offset_additional=20, # adjustment for horizontal distance
vertical_offset_additional=20 # adjustment for vertical distance
)
ms_data.tl.filter_cells(min_gene=20, min_n_genes_by_counts=3, pct_counts_mt=5, scope=slice_generator[:], mode='integrate', inplace=True)
ms_data.tl.raw_checkpoint()
ms_data.tl.normalize_total(target_sum=10000)
ms_data.tl.log1p()
ms_data.tl.highly_variable_genes(
min_mean=0.0125,
max_mean=3,
min_disp=0.5,
n_top_genes=2000,
res_key='highly_variable_genes',
scope=slice_generator[:],
mode='integrate'
)
ms_data.plt.highly_variable_genes(res_key='highly_variable_genes')
ms_data.tl.scale(max_value=10, zero_center=True)
ms_data.tl.pca(
use_highly_genes=False,
n_pcs=30,
res_key='pca',
scope=slice_generator[:],
mode='integrate'
)
ms_data.tl.neighbors(
pca_res_key='pca',
n_pcs=30,
res_key='neighbors',
scope=slice_generator[:],
mode='integrate'
)
ms_data.tl.umap(
pca_res_key='pca',
neighbors_res_key='neighbors',
res_key='umap',
scope=slice_generator[:],
mode='integrate'
)
ms_data.tl.louvain(
neighbors_res_key='neighbors',
res_key='louvain',
scope=slice_generator[:],
mode='integrate'
)
annotation_dict = {
'1':'VLMC',
'2':'VLMC',
'3':'VLMC',
'4':'Immature Neurons',
'5':'Red Blood Cell',
'6':'Apical Progenitors 1',
'7':'Apical Progenitors 2',
'8':'Apical Progenitors 3',
'9':'Apical Progenitors 4',
'10':'Medial Forebrain Progenitors 1',
'11':'unknown',
'12':'VLMC',
'13':'VLMC',
'14':'Medial Forebrain Progenitors 2',
'15':'Microglia'
}
ms_data.tl.annotation(
annotation_information=annotation_dict,
cluster_res_key='louvain',
res_key='anno_louvain'
)
ms_data.tl.find_marker_genes(cluster_res_key='anno_louvain',
output='/public/home/sangchl/work_result/Spatial_Transcriptomics/register/3/intergrate_sc_spa/'+ case + 'vs' + control + ".csv",
use_highly_genes=False,
use_raw =True,
case_groups=case,
control_groups=control)
The case group and the control group here are the cell subsets I commented, and it can be run, but it doesn't seem to be used for the control group information
In your case, the case_groups and control_groups must be a group or a list of groups in the anno_louvain
, I don't see that what values you set into them, and, how did you judge the control group didn't be used?
在你的例子中,case_groups和control_groups必须是 中的组或组列表,我看不出你在其中设置了什么值,而且,你如何判断对照组没有被使用?
anno_louvain
The case and control here are created by myself according to the format in the ms_data, and the control group is not applied because I tried different control values, but he both returned the same result
stereo.core.StPipeline.find_marker_genes: The set control group cannot be identified in the multi-sample, and when the program is running, it will only compare the difference between the case group and all the other groups