Closed ixxmu closed 11 months ago
导语
单细胞RNA测序 (scRNA-seq) 为疾病的病理学和细胞起源提供了独特的见解。本文介绍单细胞疾病相关性评分 (scDRS),这是一种在单细胞分辨率下将 scRNA-seq 与多基因疾病风险联系起来的方法,与注释的细胞类型无关。scDRS 识别出在全基因组关联研究 (GWAS) 涉及的疾病相关基因中表现出过度表达的细胞。细胞类型水平的结果大致概括了已知的细胞类型与疾病的关联,个体细胞水平结果确定了现有细胞类型标签未捕获的疾病相关细胞亚群。本篇文章将会详细讲解基于python语言的scDRS工具的介绍。
%%capture
# download precomputed data
# see above for processing code
!wget https://figshare.com/ndownloader/files/34300925 -O data.zip
!unzip data.zip && \
import scdrs
import scanpy as sc
sc.set_figure_params(dpi=125)
from anndata import AnnData
from scipy import stats
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")
# load adata
adata = sc.read_h5ad("data/expr.h5ad")
# subset gene sets
df_gs = pd.read_csv("data/geneset.gs", sep="\t", index_col=0)
df_gs = df_gs.loc[
[
"PASS_Schizophrenia_Pardinas2018",
"spatial_dorsal",
"UKB_460K.body_HEIGHTz",
],
:,
].rename(
{
"PASS_Schizophrenia_Pardinas2018": "SCZ",
"spatial_dorsal": "Dorsal",
"UKB_460K.body_HEIGHTz": "Height",
}
)
display(df_gs)
df_gs.to_csv("data/processed_geneset.gs", sep="\t")
TRAIT | GENESET |
---|---|
SCZ | Dpyd:7.6519,Cacna1i:7.4766,Rbfox1:7.3247,Ppp1r... |
Dorsal | Ckb,Fndc5,Lin7b,Gstm7,Tle1,Cabp7,Etv5,Actn1,Sa... |
Height | Wwox:10,Bnc2:10,Gmds:10,Lpp:9.9916,Prkg1:9.836... |
本教程基于cortex (Zeisel & Muñoz-Manchado et al. 2015)数据集进行工具的分析介绍,接下来是一步步的分析操作代码。首先是数据集的下载和预处理。
%%capture
!scdrs compute-score \
--h5ad-file data/expr.h5ad \
--h5ad-species mouse \
--gs-file data/processed_geneset.gs \
--gs-species mouse \
--cov-file data/cov.tsv \
--flag-filter-data True \
--flag-raw-count True \
--flag-return-ctrl-raw-score False \
--flag-return-ctrl-norm-score True \
--out-folder data/
dict_score = {
trait: pd.read_csv(f"data/{trait}.full_score.gz", sep="\t", index_col=0)
for trait in df_gs.index
}
for trait in dict_score:
adata.obs[trait] = dict_score[trait]["norm_score"]
sc.set_figure_params(figsize=[2.5, 2.5], dpi=150)
sc.pl.umap(
adata,
color="level1class",
ncols=1,
color_map="RdBu_r",
vmin=-5,
vmax=5,
)
sc.pl.umap(
adata,
color=dict_score.keys(),
color_map="RdBu_r",
vmin=-5,
vmax=5,
s=20,
)
肿瘤间和肿瘤内异质性预计将发生在癌细胞和肿瘤微环境(TME)的转录水
%%capture
for trait in ["SCZ", "Height"]:
!scdrs perform-downstream \
--h5ad-file data/expr.h5ad \
--score-file data/{trait}.full_score.gz \
--out-folder data/ \
--group-analysis level1class \
--flag-filter-data True \
--flag-raw-count True
dict_df_stats = {
trait: pd.read_csv(f"data/{trait}.scdrs_group.level1class", sep="\t", index_col=0)
for trait in ["SCZ", "Height"]
}
dict_celltype_display_name = {
"pyramidal_CA1": "Pyramidal CA1",
"oligodendrocytes": "Oligodendrocyte",
"pyramidal_SS": "Pyramidal SS",
"interneurons": "Interneuron",
"endothelial-mural": "Endothelial",
"astrocytes_ependymal": "Astrocyte",
"microglia": "Microglia",
}
fig, ax = scdrs.util.plot_group_stats(
dict_df_stats={
trait: df_stats.rename(index=dict_celltype_display_name)
for trait, df_stats in dict_df_stats.items()
},
plot_kws={
"vmax": 0.2,
"cb_fraction":0.12
}
)
我们观察到CA1锥体神经元表现出最强的细胞类型关联和显著的异质性。现在我们将重点放在这部分细胞上,进一步了解异质性的来源。
# extract CA1 pyramidal neurons and perform a re-clustering
adata_ca1 = adata[adata.obs["level2class"].isin(["CA1Pyr1", "CA1Pyr2"])].copy()
sc.pp.filter_cells(adata_ca1, min_genes=0)
sc.pp.filter_genes(adata_ca1, min_cells=1)
sc.pp.normalize_total(adata_ca1, target_sum=1e4)
sc.pp.log1p(adata_ca1)
sc.pp.highly_variable_genes(adata_ca1, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_ca1 = adata_ca1[:, adata_ca1.var.highly_variable]
sc.pp.scale(adata_ca1, max_value=10)
sc.tl.pca(adata_ca1, svd_solver="arpack")
sc.pp.neighbors(adata_ca1, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata_ca1, n_components=2)
# assign scDRS score
for trait in dict_score:
adata_ca1.obs[trait] = dict_score[trait]["norm_score"]
sc.pl.umap(
adata_ca1,
color=dict_score.keys(),
color_map="RdBu_r",
vmin=-5,
vmax=5,
s=20,
)
df_plot = adata_ca1.obs[["Dorsal", "SCZ", "Height"]].copy()
df_plot["Dorsal quintile"] = pd.qcut(df_plot["Dorsal"], 5, labels=np.arange(5))
fig, ax = plt.subplots(figsize=(3.5, 3.5))
for trait in ["SCZ", "Height"]:
sns.lineplot(
data=df_plot,
x="Dorsal quintile",
y=trait,
label=trait,
err_style="bars",
marker="o",
ax=ax,
)
ax.set_xticks(np.arange(5))
ax.set_xlabel("Dorsal quintile")
ax.set_ylabel("Mean scDRS disease score")
fig.show()
往期推荐
分析专辑
单细胞scRNA | R包绘图 | 免疫浸润分析 | 肿瘤纯度评估工具 | 数据库
文章解读专辑
多区域进化文章精读 | 高分文章精读 | 免疫微环境文献解读
招聘信息
点击红字即可进入专栏!
点个在看你最好看
https://mp.weixin.qq.com/s/e8eAU11_C_869GrxvLvuMg