Closed ixxmu closed 6 months ago
设置分析环境
import numpy as np
import pandas as pd
import scanpy as sc
import os
import ktplotspy as kpy
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action="ignore", category=Warning)
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80)
projpath = 'D:\\公众号\\2024scanpy'
chapter = "09_CellPhoneDB"
os.chdir(projpath)
if not os.path.exists(chapter):
os.makedirs(chapter)
os.chdir(chapter)
# 下载数据库
from cellphonedb.utils import db_utils
cpdb_target_dir = os.path.join(projpath, 'Resource/cellphonedb')
db_utils.download_database(cpdb_target_dir, 'v5.0.0')
from cellphonedb.src.core.methods import cpdb_statistical_analysis_method
os.chdir(projpath)
if not os.path.exists(chapter+"/statistical_analysis"):
os.makedirs(chapter+"/statistical_analysis")
os.chdir(chapter+"/statistical_analysis")
# 读取待分析数据
adata = sc.read_h5ad(os.path.join(projpath, "05_CellAnnotation/adata_annotated.h5ad"))
adata.obs.groupby('celltype')['type'].value_counts()
celltype type
B-cell Covid 526
Ctrl 507
CD14+ Monocyte Covid 1286
Ctrl 1037
CD16+ Monocyte Ctrl 287
Covid 20
CD4+ T Ctrl 711
Covid 279
CD8+ T Ctrl 647
Covid 237
NK Ctrl 1137
Covid 397
Plasmablast Covid 26
Ctrl 9
Platelet Covid 872
Ctrl 59
Name: count, dtype: int64
# 准备Covid组的分析数据
if not os.path.exists('Covid'):
os.mkdir('Covid')
bdata = adata[adata.obs['type']=='Covid'].copy()
bdata.obs['celltype'].to_csv('Covid/meta.csv')
xdata = sc.AnnData(bdata.to_df())
xdata.write_h5ad('Covid/count.h5ad', compression='gzip')
# 准备Ctrl组的分析数据
if not os.path.exists('Ctrl'):
os.mkdir('Ctrl')
bdata = adata[adata.obs['type']=='Ctrl'].copy()
bdata.obs['celltype'].to_csv('Ctrl/meta.csv')
xdata = sc.AnnData(bdata.to_df())
xdata.write_h5ad('Ctrl/count.h5ad', compression='gzip')
# Covid组数据cellphonedb分析
cpdb_file_path = os.path.join(projpath, 'Resource/cellphonedb/cellphonedb.zip')
cpdb_statistical_analysis_method.call(
cpdb_file_path = cpdb_file_path, # 数据库文件路径
meta_file_path = 'Covid/meta.csv', # 细胞分类文件路径
counts_file_path = 'Covid/count.h5ad', # 表达矩阵文件路径
counts_data = 'hgnc_symbol',
score_interactions = True, # 是否对分析结果进行综合评分
iterations = 1000, # 置换检验的次数
threshold = 0.1, # 基因的最低细胞表达比例
threads = 6, # 分析使用的cpu线程数量
pvalue = 0.05, # 确定显著性的P-value阈值
output_path = 'Covid', # 结果保存目录
output_suffix = '' # 输出文件名后缀,默认是运行时间日期
)
# Ctrl组数据cellphonedb分析
cpdb_file_path = os.path.join(projpath, 'Resource/cellphonedb/cellphonedb.zip')
cpdb_statistical_analysis_method.call(
cpdb_file_path = cpdb_file_path, # 数据库文件路径
meta_file_path = 'Ctrl/meta.csv', # 细胞分类文件路径
counts_file_path = 'Ctrl/count.h5ad', # 表达矩阵文件路径
counts_data = 'hgnc_symbol',
score_interactions = True, # 是否对分析结果进行综合评分
iterations = 1000, # 置换检验的次数
threshold = 0.1, # 基因的最低细胞表达比例
threads = 6, # 分析使用的cpu线程数量
pvalue = 0.05, # 确定显著性的P-value阈值
output_path = 'Ctrl', # 结果保存目录
output_suffix = '' # 输出文件名后缀,默认是运行时间日期
)
下面以Covid组为例展示常用的可视化结果
# 读取画图文件
adata = sc.read_h5ad("Covid/count.h5ad")
meta = pd.read_csv("Covid/meta.csv", index_col=0)
adata.obs = meta
means = pd.read_csv("Covid/statistical_analysis_means_.txt", sep="\t")
pvals = pd.read_csv("Covid/statistical_analysis_pvalues_.txt", sep="\t")
# 所有细胞类型之间显著互作数量热图
ax = kpy.plot_cpdb_heatmap(pvals=pvals, log1p_transform=False,
figsize=(6, 6),
title="Sum of significant interactions")
ax.ax_heatmap.grid(False)
ax.figure.savefig("Covid/sum_of_sig_cci.pdf")
# 指定细胞类型显著互作数量热图
ax = kpy.plot_cpdb_heatmap(pvals=pvals, cell_types=['CD4+ T','CD8+ T','B-cell','CD14+ Monocyte','CD16+ Monocyte'],
figsize=(5, 5), title="Sum of significant interactions")
ax.ax_heatmap.grid(False)
ax.figure.savefig("Covid/sum_of_sig_cci_sub.pdf")
# 所有细胞类型与配受体的气泡图
g = kpy.plot_cpdb(
adata=adata,
cell_type1=".", # "."代表所有细胞
cell_type2=".",
means=means,
pvals=pvals,
celltype_key="celltype",
figsize=(30, 40),
max_size=8,
highlight_size=1,
title="interacting interactions!",
)
g.save("Covid/cci_dotplot.pdf", limitsize=False)
# 指定细胞与配受体的互作气泡图:按基因家族过滤互作分子
g = kpy.plot_cpdb(
adata=adata,
celltype_key="celltype",
means=means,
pvals=pvals,
cell_type1=".",
cell_type2="CD8+ T|CD8+ T", #多个细胞用竖线隔开
gene_family=["chemokines"], # 可选"chemokines", "th1", "th2", "th17", "treg", "costimulatory", "coinhibitory"
figsize=(9, 3.5),
highlight_size=1,
title="interacting interactions!",
)
g.save("Covid/cci_dotplot_sub1.pdf")
g
# 指定细胞与配受体的互作气泡图:按基因名称过滤互作分子
g = kpy.plot_cpdb(
adata=adata,
celltype_key="celltype",
means=means,
pvals=pvals,
cell_type1=".",
cell_type2="CD8+ T|CD8+ T", #多个细胞用竖线隔开
genes=['ALCAM','CD6','BTLA','TNFRSF14','CCL3','CCR1','CD44','TYROBP','CD93','IFNGR1'],
figsize=(9, 5),
highlight_size=1,
title="interacting interactions!",
)
g.save("Covid/cci_dotplot_sub2.pdf")
g
from cellphonedb.src.core.methods import cpdb_degs_analysis_method
os.chdir(projpath)
if not os.path.exists(chapter+"/degs_analysis"):
os.makedirs(chapter+"/degs_analysis")
os.chdir(chapter+"/degs_analysis")
# 读取待分析数据
adata = sc.read_h5ad(os.path.join(projpath, "05_CellAnnotation/adata_annotated.h5ad"))
adata.obs.groupby('celltype')['type'].value_counts()
celltype type
B-cell Covid 526
Ctrl 507
CD14+ Monocyte Covid 1286
Ctrl 1037
CD16+ Monocyte Ctrl 287
Covid 20
CD4+ T Ctrl 711
Covid 279
CD8+ T Ctrl 647
Covid 237
NK Ctrl 1137
Covid 397
Plasmablast Covid 26
Ctrl 9
Platelet Covid 872
Ctrl 59
Name: count, dtype: int64
# 准备Covid组的分析数据
if not os.path.exists('Covid'):
os.mkdir('Covid')
bdata = adata[adata.obs['type']=='Covid'].copy()
bdata.obs['celltype'].to_csv('Covid/meta.csv')
xdata = sc.AnnData(bdata.to_df())
xdata.write_h5ad('Covid/count.h5ad', compression='gzip')
# 准备Ctrl组的分析数据
if not os.path.exists('Ctrl'):
os.mkdir('Ctrl')
bdata = adata[adata.obs['type']=='Ctrl'].copy()
bdata.obs['celltype'].to_csv('Ctrl/meta.csv')
xdata = sc.AnnData(bdata.to_df())
xdata.write_h5ad('Ctrl/count.h5ad', compression='gzip')
# 按分组分别准备细胞类型marker基因用于分析
for i in ['Covid', 'Ctrl']:
bdata = adata[adata.obs['type']==i].copy()
sc.tl.rank_genes_groups(bdata, groupby='celltype', method='wilcoxon', pts=True)
groups = bdata.uns['rank_genes_groups']['names'].dtype.names
res2df = []
for g in groups:
df_temp = sc.get.rank_genes_groups_df(bdata, group=g)
df_temp = df_temp.assign(celltype=g)
res2df.append(df_temp)
res2df = pd.concat(res2df, ignore_index=True)
res2df = res2df[(res2df['logfoldchanges'] > 0.25) & (res2df['pvals_adj'] < 0.05) & (res2df['pct_nz_group'] > 0.1)]
res2df = res2df[['celltype', 'names']]
res2df.to_csv(i+"/deg.csv", index=False)
cpdb_file_path = os.path.join(projpath, 'Resource/cellphonedb/cellphonedb.zip')
cpdb_degs_analysis_method.call(
cpdb_file_path=cpdb_file_path,
meta_file_path='Covid/meta.csv',
counts_file_path='Covid/count.h5ad',
degs_file_path='Covid/deg.csv',
counts_data='hgnc_symbol',
score_interactions=True,
output_path='Covid',
output_suffix = '',
threads=6)
cpdb_file_path = os.path.join(projpath, 'Resource/cellphonedb/cellphonedb.zip')
cpdb_degs_analysis_method.call(
cpdb_file_path=cpdb_file_path,
meta_file_path='Ctrl/meta.csv',
counts_file_path='Ctrl/count.h5ad',
degs_file_path='Ctrl/deg.csv',
counts_data='hgnc_symbol',
score_interactions=True,
output_path='Ctrl',
output_suffix = '',
threads=6)
注意:degs_analysis模式下一定要设置degs_analysis=True
adata = sc.read_h5ad("Covid/count.h5ad")
meta = pd.read_csv("Covid/meta.csv", index_col=0)
adata.obs = meta
means = pd.read_csv("Covid/degs_analysis_means_.txt", sep="\t")
pvals = pd.read_csv("Covid/degs_analysis_relevant_interactions_.txt", sep="\t")
# 热图
ax = kpy.plot_cpdb_heatmap(pvals=pvals, log1p_transform=False,
degs_analysis=True,
figsize=(6, 6),
title="Sum of significant interactions")
ax.ax_heatmap.grid(False)
ax.figure.savefig(i+"/sum_of_sig_cci.pdf")
# 气泡图
g = kpy.plot_cpdb(
adata=adata,
cell_type1=".",
cell_type2=".",
means=means,
pvals=pvals,
degs_analysis=True, #注意此参数
celltype_key="celltype",
gene_family=["costimulatory"],
figsize=(20, 15),
max_size=6,
highlight_size=1,
title="interacting interactions!",
)
g.save(i+"/cci_dotplot.pdf", limitsize=False)
g
交流合作 Kinesin专业从事单细胞与空转数据的分析与培训,有此需求的朋友欢迎加微信洽谈合作。本公众号交流群向科研院校师生和公司生信人员开放,希望入群的朋友也可加我微信申请。
https://mp.weixin.qq.com/s/c4AeOjnLm92SrUorz9OsVA