Closed ixxmu closed 3 months ago
好久没做文献复现了…
上一次好像还是在去年的3月份左右,详见:【生信文献200篇】专栏也落下帷幕,生信菜鸟团的发展需要你 。。。。
所以距离这个领域我已经脱节一年了….别骂了别骂了在学了在学了😧。
如有不对,请指出,下次一定更正!!!
文章在这:Tumor microenvironment remodeling after neoadjuvant immunotherapy in non-small cell lung cancer revealed by single-cell RNA sequencing
方法:来自3名治疗前和12名接受新辅助PD-1阻断联合化疗的非小细胞肺癌(NSCLC)患者的~92,000个单细胞的转录组。根据病理反应将12个治疗后样本分为两组:MPR(n = 4)和非MPR(n = 8)。
主要病理反应(MPR)被定义为治疗后苏木精和伊红(H&E)染色的残余活肿瘤细胞不超过10%
结果和结论:不同的治疗诱导的癌细胞转录组与临床反应有关。MPR患者中:癌细胞通过MHC-II表现出活化抗原呈递的特征。aged CCL3+中性粒细胞减少,FCRL4+FCRL5+记忆B细胞和CD16+CX3CR1+单核细胞富集,并且是免疫治疗反应的预测因子。NMPR患者中:癌细胞表现出雌激素代谢酶的过表达和血清雌二醇升高。在所有患者中,治疗促进了细胞毒性T细胞和CD16+NK细胞的扩增和活化,减少了免疫抑制性Treg,并将记忆CD8+T细胞激活为效应表型。组织驻留巨噬细胞在治疗后扩增,肿瘤相关巨噬细胞(TAM)被重塑为中性。
接下来是对文中的图进行的复现…
纯手工,自己写代码,并不是文章的代码哦。。。。
文章原图
rm(list=ls())
options(stringsAsFactors = F)
source('scrip/lib.R')
library(readxl)
###### step1:导入数据 ######
ct=fread( 'GSE207422_RAW/GSE207422_NSCLC_scRNAseq_UMI_matrix.txt.gz',data.table = F)
ct[1:4,1:4]
ct[nrow(ct),1:4]
rownames(ct)=ct[,1]
ct=ct[,-1]
ct[1:4,1:4]
ct1 = ct
s <- colnames(ct1)
s[1:10]
s1 = substr(s, 1, 11)
s1[1:10]
table(s1)
colnames(ct1) = s1
meta <- read_excel("GSE207422_RAW/GSE207422_NSCLC_scRNAseq_metadata.xlsx")
sce.all = CreateSeuratObject(counts = ct1 ,
min.cells = 5,
min.features = 500)
as.data.frame(sce.all@assays$RNA@counts[1:10, 1:2])
head(sce.all@meta.data, 10)
table(sce.all@meta.data$orig.ident)
s = colnames(sce.all)
s1 = substr(s, 1, 11)
s1[1:10]
table(s1)
s1 = gsub(c('BD_immune01|BD_immune05|BD_immune08'),"Pre-treatment",s1)
s2 = gsub("BD_immune.*$", "Post-treatment", s1) #第一个空格直达结尾替换成b
table(s2)
phe=str_split( colnames(sce.all) ,'_',simplify = T)
head(phe)
table(phe[,1])
table(phe[,2])
table(phe[,2],phe[,1])
sce.all$orig.ident = s2
table(sce.all$orig.ident )
sce.all$group=sce.all$orig.ident
sce.all$orig.ident=paste(phe[,2],phe[,1])
head(sce.all@meta.data)
table(sce.all@meta.data$group)
table(sce.all@meta.data$orig.ident)
###### step2:QC质控 ######
dir.create("./1-QC")
setwd("./1-QC")
source('../scrip/qc.R')
sce.all.filt = basic_qc(sce.all)
print(dim(sce.all))
print(dim(sce.all.filt))
setwd('../')
sp='human'
###### step3: harmony整合多个单细胞样品 ######
dir.create("2-harmony")
getwd()
setwd("2-harmony")
source('../scrip/harmony.R')
# 默认 ScaleData 没有添加"nCount_RNA", "nFeature_RNA"
# 默认的
sce.all.int = run_harmony(sce.all.filt)
setwd('../')
#######step4: 检查常见分群情况
rm(list=ls())
library(ggplot2)
dir.create("3-cell")
setwd("3-cell")
getwd()
sce.all.int=readRDS("../2-harmony/sce.all_int.rds")
table(Idents(sce.all.int))
table(sce.all.int$seurat_clusters)
table(sce.all.int$RNA_snn_res.0.1)
table(sce.all.int$RNA_snn_res.0.3)
sel.clust = "RNA_snn_res.0.3"
sce.all <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all@active.ident)
meta <- read_excel("../GSE207422_RAW/GSE207422_NSCLC_scRNAseq_metadata.xlsx")
s = colnames(sce.all)
s[1:10]
s1 = substr(s, 1, 11)
s1[1:10]
table(s1)
s2 = gsub("BD_immune", "P", s1)
table(s2)
sce.all$patient = s2
table(sce.all$patient )
##RECIST
colnames(meta)
meta$RECIST
s_RECIST = gsub(c('BD_immune01|BD_immune07|BD_immune08|BD_immune11|BD_immune12|BD_immune15'),"PR",s1)
s_RECIST = gsub("BD_immune.*$", "SD", s_RECIST)
table(s_RECIST)
sce.all$RECIST = s_RECIST
##病理反应
##后面是按照TN和治疗后手术样本来分类的
colnames(meta)
meta$`Pathologic Response`
s_Pathologic = gsub(c('BD_immune03|BD_immune06|BD_immune11|BD_immune14'),"MPR",s1)
s_Pathologic = gsub("BD_immune.*$", "NMPR", s_Pathologic)
table(s_Pathologic)
sce.all$Pathologic = s_Pathologic
phe = sce.all@meta.data
phe[phe$orig.ident == 'Pre-treatment', "Pathologic"] = 'TN'
table(phe$Pathologic)
sce.all@meta.data = phe
table(sce.all$Pathologic)
##药物治疗
colnames(meta)
meta = meta[1:15,]
meta$medicine = paste(meta$`PD1 Antibody`,meta$Chemotherapy,sep="+")
s1
table(meta$medicine)
match_vec <- match(s1, unique(s1))
s_medicine <- meta$medicine[match_vec]
##检验一下是不是对的...
s1[23456]
s_medicine[23456]
s1[45678]
s_medicine[45678]
table(s_medicine)
sce.all$medicine = s_medicine
saveRDS(sce.all, "sce.all_meta.rds")
#marker
genes_to_check = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A', ## T
'CD19', 'CD79A', 'MS4A1' , ## B
'IGHG1', 'MZB1', 'SDC1', ## Plasma
'CD68', 'CD163', 'CD14',
'VCAN', 'FCN1',
'TPSAB1' , 'TPSB2', # mast cells,
'RCVRN','FPR1' , 'ITGAM' ,
'C1QA', 'C1QB', # mac
'S100A9', 'S100A8', 'MMP19',# monocyte
'MKI67' , 'TOP2A','LYZ', ##myloid
'LAMP3', 'IDO1','IDO2',## DC3
'CD1E','CD1C', # DC2
'CLEC10A', 'CLEC12A','LILRA4', ##pDC
'FGFBP2','NCR1','KLRK1','KLRC1','NCAM1','FCGR3A', # NK
'KIT', ##MAST
'CSF3R','ITGAM','ITGB2','FUT4','FCGR3B','FCGR2A', ##Neutrophil
'FGF7','MME', 'ACTA2','VWF','COL1A1', ## human fibo
'DCN', 'LUM', 'GSN' , ## mouse PDAC fibo
'PECAM1', 'VWF', ## endo
'EPCAM' , 'KRT19','KRT7' # epi
)
library(stringr)
genes_to_check=str_to_upper(genes_to_check)
genes_to_check
p <- DotPlot(sce.all, features = unique(genes_to_check),
assay='RNA' ) + coord_flip()
p
mycolors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3',
'#E95C59', '#E59CC4', '#AB3282', '#BD956A',
'#9FA3A8', '#E0D4CA', '#C5DEBA', '#F7F398',
'#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
'#712820', '#DCC1DD', '#CCE0F5', '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
'#968175')
col =c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
"#885649","#DD76C5","#7F7F7F","#BBBE00","#41BED1")
p_umap=DimPlot(sce.all, reduction = "umap",cols = mycolors,pt.size = 0.4,
group.by = "RNA_snn_res.0.3",label = T,label.box = T)
p_umap
library(patchwork)
p+p_umap
ggsave('markers_umap_2.pdf',width = 13,height = 8)
#####step5: 细胞生物学命名
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
getwd()
sce.all=readRDS( "../3-cell/sce.all_meta.rds")
sce.all
celltype=data.frame(ClusterID=0:17,
celltype= 0:17)
#定义细胞亚群
celltype[celltype$ClusterID %in% c(0 ,12 ),2]='Tcells'
celltype[celltype$ClusterID %in% c( 4 ),2]='Bcells'
celltype[celltype$ClusterID %in% c( 5),2]='plasma'
celltype[celltype$ClusterID %in% c( 13 ),2]='mast'
celltype[celltype$ClusterID %in% c( 14 ),2]='pDC'
celltype[celltype$ClusterID %in% c( 11 ),2]='fibroblast'
celltype[celltype$ClusterID %in% c( 1,2,15 ),2]='myeloids'
celltype[celltype$ClusterID %in% c( 6,8,9,10,17 ),2]='epithelium'
celltype[celltype$ClusterID %in% c( 3,16 ),2]='neutrophil'
celltype[celltype$ClusterID %in% c( 7 ),2]='NK'
head(celltype)
celltype
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.0.3 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)
th=theme(axis.text.x = element_text(angle = 45,
vjust = 0.5, hjust=0.5))
library(patchwork)
p_umap=DimPlot(sce.all, reduction = "umap",cols = col,pt.size = 0.4,
group.by = "celltype",label = T)
p+p_umap
ggsave('markers_umap_by_celltype_2.pdf',width = 15,height = 7)
saveRDS(sce.all, "sce.all_celltype.rds")
phe=sce.all@meta.data
save(phe,file = 'phe-by-markers.Rdata')
colnames(phe)
table(sce.all$Pathologic)
p = DimPlot(sce.all, reduction = "umap",split.by = 'Pathologic',
group.by = "celltype",label = T)
install.packages('tidydr')
library(ggplot2)
library(tidydr)
p <- p + theme_dr()
p
p <- p + theme(panel.grid=element_blank())
p
load('sce-by-celltype.Rdata')
features = c('PTPRC','KRT19','COL1A1','VWF','CD3E','CD79A','MS4A1',
'IGHG1','CSF3R','LYZ','FGFBP2','KIT','LILRA4','MKI67')
FeaturePlot(sce.all,features = features,ncol = 5 )
###### step6: 单细胞亚群比例差异 ######
## 每种细胞类型中,分组所占比例 ----
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
getwd()
sce.all=readRDS( "3-cell/sce.all_celltype.rds")
sce.all
phe=sce.all@meta.data
library(tidyr)# 使用的gather & spread
library(reshape2) # 使用的函数 melt & dcast
colnames(phe)
tb=table(phe$celltype,
phe$patient)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var2) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
write.csv(bar_per,file = "celltype_by_group_percent.csv")
col =c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
"#885649","#DD76C5","#7F7F7F","#BBBE00","#41BED1")
ggplot(bar_per, aes(x = percent, y = Var2)) +
geom_bar(aes(fill = Var1) , stat = "identity") + coord_flip() +
theme(axis.ticks = element_line(linetype = "blank"),
legend.position = "top",
panel.grid.minor = element_line(colour = NA,linetype = "blank"),
panel.background = element_rect(fill = NA),
plot.background = element_rect(colour = NA)) +
labs(y = "% Relative cell source", fill = NULL)+labs(x = NULL)+
scale_fill_manual(values=col)
ggsave("celltype_by_group_percent.pdf",
width = 8,height = 4)
###Figure1D 分为未治疗组TN,和治疗组(治疗组又分为MPR和NMPR)
table(phe$celltype)
table(phe$patient)
phe$patientoutcone = paste(phe$patient,phe$Pathologic,sep = '_')
phe_immune = phe[phe$celltype %in% c('Bcells','myeloids','neutrophil','NK','Tcells'),]
tb=table(phe_immune$celltype,
phe_immune$patientoutcone)
head(tb)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var2) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
bar_per$Var2
group = ifelse(str_detect(bar_per$Var2,"TN"),"TN",
ifelse(str_detect(bar_per$Var2,"NMPR"),"NMPR","MPR"))
bar_per$group = group
bar_per$group = factor(bar_per$group,levels =c("TN","MPR","NMPR"))
table(bar_per$Var1)
bar_per$Var1 = factor(bar_per$Var1,levels =c("Tcells","Bcells","NK",'myeloids','neutrophil'))
###拼接五个箱线图
library(gridExtra)
p <- list()
for(i in 1:5){
# 选择当前细胞类型的数据
cell <- bar_per[bar_per$Var1 == unique(bar_per$Var1)[i],]
p[[i]] <- ggplot(cell, aes(x = group, y = percent, fill = group)) +
geom_boxplot(size = 0.5, fill = "white", outlier.fill = "white", outlier.color = "white", color = col) +
geom_jitter(aes(fill = group), width = 0.2, shape = 21, size = 2) +
scale_fill_manual(values = col) +
ggtitle(paste0(unique(bar_per$Var1)[i], " ")) +
theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(colour = "black", family = "Times", size = 14),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.x = element_text(family = "Times", size = 14, face = "plain"),
plot.title = element_text(family = "Times", size = 15, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
ylab("Fraction (%) of immune cells") + xlab(" ")
# 移除y轴标题和刻度标签
p[[i]] <- p[[i]] + theme(axis.title.y = element_blank())
}
combined_plot <- grid.arrange(grobs = p, ncol = 5, left = "Fraction (%) of immune cells") # 设置共同的纵轴标题和拼图列数
print(combined_plot)
####Epithelial cell重聚类####
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
dir.create("4-subEpi")
setwd("4-subEpi")
getwd()
sce.all=readRDS( "../3-cell/sce.all_celltype.rds")
sce = sce.all
DefaultAssay(sce) <- "RNA"
table(sce$orig.ident)
table(sce$celltype)
sce2 = sce[, sce$celltype %in% c( 'epithelium' )]
table(sce2$celltype)
sce = sce2
table(sce$patient,sce$celltype)
table(sce$medicine,sce$celltype)
table(sce$orig.ident)
Idents(sce) = sce$patient
sce=CreateSeuratObject(counts = sce@assays$RNA@counts,
meta.data = sce@meta.data)
sce <- NormalizeData(sce, normalization.method = "LogNormalize",
scale.factor = 1e4)
GetAssay(sce,assay = "RNA")
sce <- FindVariableFeatures(sce,
selection.method = "vst", nfeatures = 2000)
sce <- ScaleData(sce)
sce <- RunPCA(object = sce, pc.genes = VariableFeatures(sce))
DimHeatmap(sce, dims = 1:12, cells = 100, balanced = TRUE)
ElbowPlot(sce)
sce <- FindNeighbors(sce, dims = 1:15)
sce <- FindClusters(sce, resolution = 0.1)
table(sce@meta.data$RNA_snn_res.0.1)
set.seed(123)
sce <- RunTSNE(object = sce, dims = 1:15, do.fast = TRUE)
saveRDS(sce,sce.all, "sce_epi.rds")
sce <- RunUMAP(object = sce, dims = 1:5, do.fast = TRUE)
DimPlot(sce,reduction = "umap",label=T)
head(sce@meta.data)
DimPlot(sce,reduction = "umap",label=T,group.by = 'orig.ident')
mycolors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3',
'#E95C59', '#E59CC4', '#AB3282', '#BD956A',
'#9FA3A8', '#E0D4CA', '#C5DEBA', '#F7F398',
'#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
'#712820', '#DCC1DD', '#CCE0F5', '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
'#968175')
library(harmony)
table(sce$orig.ident)
seuratObj <- RunHarmony(sce, "orig.ident")
names(seuratObj@reductions)
seuratObj <- RunUMAP(seuratObj, dims = 1:8,
reduction = "harmony")
DimPlot(seuratObj,reduction = "umap",label=T )
library(patchwork)
library(RColorBrewer)
library(scales)
col4<-colorRampPalette(brewer.pal(8,'Set2'))(12)
DimPlot(seuratObj, reduction = 'umap',pt.size = 2,cols = mycolors,label = T,group.by = "RNA_snn_res.0.1",label.size = 6,raster=FALSE)
sce=seuratObj
sce <- FindNeighbors(sce, reduction = "harmony",
dims = 1:5)
sce <- FindClusters(sce, resolution = 0.5)
table(sce@meta.data$seurat_clusters)
DimPlot(sce,reduction = "umap",label=T)
##居然发现还是原始配色好看点,我哭死
DimPlot(sce, reduction = 'umap',pt.size = 1,label = T,group.by = "RNA_snn_res.0.5",label.size = 6,raster=FALSE)
ggsave(filename = 'harmony_umap_recluster_by_0.5.pdf',width = 7,height = 7)
col4<-colorRampPalette(brewer.pal(8,'Set2'))(15)
sce$orig.ident
DimPlot(sce, reduction = 'umap',pt.size = 1,cols = col4,label = T,group.by = "patient",label.size = 6,raster=FALSE)
ggsave(filename = 'harmony_umap_sce_recluster_by_patient.pdf',width = 7,height = 7)
#####患者之间的差异
DimPlot(sce, reduction = 'umap',pt.size = 1,cols = col4,label = T,split.by = 'orig.ident',label.size = 6,raster=FALSE)
ggsave(filename = 'umap_sce_recluster_treatment.pdf',width = 10,height = 6.5)
###配色什么的,不重要啦,喜欢什么颜色就放什么颜色
saveRDS(sce,file = "sce_epi.rds")
###copykat区分肿瘤细胞和正常细胞
devtools::install_github("navinlabcode/copykat")
library("copykat")
library(Seurat)
library(copykat)
library(tidyverse)
scRNA <- sce
counts <- as.matrix(scRNA@assays$RNA@counts)
table(scRNA$celltype)
cnv <- copykat(rawmat=counts, ngene.chr=5, sam.name="test", n.cores=8)
# ngene.chr参数是过滤细胞的一个标准,它要求被用来做CNV预测的细胞,一个染色体上至少有5个基因。
# sam.name定义样本名称 (sample name),会给出来的文件加前缀
saveRDS(cnv, "cnv.rds")
table(rownames(cnv$CNAmat))
a = cnv$prediction$copykat.pred
table(a)
scRNA$CopyKAT = a
p1 <- DimPlot(scRNA, group.by = "celltype", label = T)
p2 <- DimPlot(scRNA, group.by = "CopyKAT") + scale_color_manual(values = c("#F8766D",'#02BFC4', "gray"))
pc <- p1 + p2
pc
ggsave("pred_mallignant.pdf", pc, width = 12, height = 5)
###虽然这里得到结果了,但是我觉得上面选5数值太大了,也不想重新跑一遍几小时了....
cols = c("gray", "coral2")
plot <- FeaturePlot(scRNA, features = c('AGER','SFTPA2','SCGB1A1','TPPP3','KRT17'),cols = cols, pt.size = 1)+
theme(panel.border = element_rect(fill=NA,color="black", size=1, linetype="solid"))#加边框
plot_grid(p2,plot)
#####注释亚群
#marker
#1. DST:参与细胞骨架的形成和维护,细胞附着和迁移的调节,肌肉成熟和神经元发育等过程。
#2. KRT17:编码角质形成细胞中的一种角蛋白,参与细胞骨架的形成和角质形成。
#3. S100P:与钙离子结合的蛋白质,参与细胞增殖、迁移和侵袭等生物学过程,也可能与多种癌症的发生和发展有关。
#4. PCNA:是细胞周期中S期的细胞核蛋白,在DNA复制和细胞增殖过程中起重要作用。
#5. TOP2A:编码一种DNA拓扑异构酶,参与DNA的超螺旋松弛和重连,影响DNA复制和细胞分裂。
#6. SFTPA2:编码一种肺表面活性物质蛋白,参与肺泡的防御和免疫功能。
#7. SPARCL1:编码一种分泌蛋白,参与肿瘤抑制、细胞迁移和基质重塑等过程。
#8. SERPINB9:编码一种丝氨酸蛋白酶抑制剂,参与炎症调节和细胞凋亡等过程。
#9. SCGB3A1:编码一种分泌蛋白,可能与肺部基底细胞的分化和肺免疫功能有关。
#10. TPPP3:编码一种调节微管动力学的蛋白,可能参与细胞骨架的重塑和细胞运动等生物学过程。
genes_to_check = c('DST', 'AKR1B10', 'NDUFA4L2',
'KRT17' , 'CEACAM5', 'LY6G6C', 'KLK10',
'S100P', 'S100A9', 'MT2A',
'PCNA', 'TYMS', 'HIST1H2BJ', 'RRM2','DUT',
'TOP2A', 'TPX2', 'CENPF','ARL6IP1',
'SFTPA2', 'SFTPA1', 'SFTPB',
'SPARCL1' ,'IGKC','AL365357.1',
'SERPINB9', 'MYH11' ,'GJB6',
'SCGB3A1','MSMB', 'SCGB1A1', 'SLPI',
'TPPP3','CAPS', 'C9orf24', 'RSPH1')
library(stringr)
genes_to_check=str_to_upper(genes_to_check)
genes_to_check
p <- DotPlot(scRNA, features = unique(genes_to_check),
assay='RNA' ) + coord_flip()
mycolors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3',
'#E95C59', '#E59CC4', '#AB3282', '#BD956A',
'#9FA3A8', '#E0D4CA', '#C5DEBA', '#F7F398',
'#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
'#712820', '#DCC1DD', '#CCE0F5', '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
'#968175')
p_umap=DimPlot(scRNA, reduction = "umap",cols = mycolors,pt.size = 0.4,
group.by = "RNA_snn_res.0.5",label = T,label.box = T)
p_umap
library(patchwork)
p+p_umap
ggsave('markers_umap_2.pdf',width = 13,height = 8)
#####细胞生物学命名
celltype=data.frame(ClusterID=0:13,
celltype= 0:13)
#定义细胞亚群
celltype[celltype$ClusterID %in% c( 0 ),2]='E0_DST'
celltype[celltype$ClusterID %in% c( 9 ),2]='E1_KRT17'
celltype[celltype$ClusterID %in% c(4,6,10,13 ),2]='E2_S100P'
celltype[celltype$ClusterID %in% c(2 ),2]='E3_PCNA'
celltype[celltype$ClusterID %in% c( 7 ),2]='E4_TOP2A'
celltype[celltype$ClusterID %in% c( 5,12 ),2]='E5_SFTPA2'
celltype[celltype$ClusterID %in% c( 3 ),2]='E6_SPARCL1'
celltype[celltype$ClusterID %in% c( 8 ),2]='E7_SERPINB9'
celltype[celltype$ClusterID %in% c(1 ),2]='E8_SCGB3A1'
celltype[celltype$ClusterID %in% c( 11),2]='E9_TPPP3'
head(celltype)
celltype
table(celltype$celltype)
scRNA@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
scRNA@meta.data[which(scRNA@meta.data$RNA_snn_res.0.5 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(scRNA@meta.data$celltype)
th=theme(axis.text.x = element_text(angle = 45,
vjust = 0.5, hjust=0.5))
library(patchwork)
p_umap=DimPlot(scRNA, reduction = "umap",cols = mycolors,pt.size = 0.4,
group.by = "celltype",label = T)
p+p_umap
ggsave('markers_umap_by_celltype_2.pdf',width = 15,height = 7)
saveRDS(scRNA, "sce.epi_celltype.rds")
######基因在不同病理结局的患者中表达情况
rm(list=ls())
scRNA=readRDS( "sce.epi_celltype.rds")
scRNA
table(scRNA$CopyKAT)
##在这里我查看了加入无法分类的细胞和未加入无法分类的细胞,差别还挺大的
#加入:CX3CL1趋势不对,AKR1C3趋势正确
#不加入:CX3CL1趋势正确,AKR1C3趋势不对
sce_obj = scRNA[, scRNA$CopyKAT %in% c( 'aneuploid')]
sce_obj$patientoutcone = paste(sce_obj$patient,sce_obj$Pathologic,sep = '_')
# 通过Seurat的AverageExpression函数计算每个患者基因的平均表达量
avg <- AverageExpression(object = sce_obj, assay = "RNA", group.by = "patientoutcone")
a = avg$RNA
C1 = c('CX3CL1','CD74','HLA-DRA')
D1 = c('AKR1C1','AKR1C2','AKR1C3')
#这六个基因的生物学意义如下:
#1. CX3CL1是一种细胞因子,也被称为神经内皮炎性因子1(N-TAC),它是一种趋化因子,在炎症和免疫反应中起着重要作用。它与免疫细胞的迁移和炎症反应相关,同时也参与神经发育和神经保护。
#2. CD74是一种细胞膜蛋白,也被称为类别II转运分子细胞膜关联蛋白。CD74与抗原呈递、免疫调节和细胞信号传导等免疫功能密切相关。它在抗原处理和递呈的过程中起重要作用,并在免疫细胞识别、炎症和免疫反应中发挥关键作用。
#3. HLA-DRA(Human Leukocyte Antigen-DR α chain):HLA-DRA是人白细胞抗原-DR(HLA-DR)的α链。HLA-DR是一种MHC类II复合物,负责呈递外源性抗原给T细胞,从而触发免疫反应。HLA-DRA与免疫系统的抗原递呈、T细胞的激活和调节密切相关,在免疫途径中发挥重要作用。
#4. AKR1C1属于醛酮还原酶家族的一员。它是一种脱氢酶,对多种内源性和外源性化合物具有还原活性。AKR1C1参与睾酮、孕酮等激素的代谢转化,同时还参与抗氧化和抗炎过程。
#5. AKR1C2也是醛酮还原酶家族的成员,与AKR1C1具有相似的酶活性。它在多种生物学过程中发挥重要作用,包括激素代谢、抗氧化、抗炎和神经保护。
#6. AKR1C3是醛酮还原酶家族的一员,与AKR1C1和AKR1C2具有相似的酶活性。它参与睾酮、雄激素和孕激素的代谢转化,同时还具有抗氧化和抗炎活性。在某些癌症中,AKR1C3的过度表达与抗药性和肿瘤的恶性进展有关。
gene_boxplot <- function(gene){
b = as.data.frame(a[gene,])
colnames(b) = gene
phe = b
group = ifelse(str_detect(rownames(phe),"TN"),"TN",
ifelse(str_detect(rownames(phe),"NMPR"),"NMPR","MPR"))
phe$group = group
phe$group = factor(phe$group,levels =c("TN","MPR","NMPR"))
table(phe$group)
colnames(phe)
col = c("#508E42", "#DF5E38","#7B7DC4")
P_T <- ggplot(phe,aes(x=group,y=!!rlang::sym(gene),fill=group)) +
geom_boxplot(size=0.5,fill="white",outlier.fill="white",outlier.color="white",color=col) +
geom_jitter(aes(fill=group),width =0.2,shape = 21,size=2) +
scale_fill_manual(values = col)+
ggtitle(gene) +
theme_bw() +
theme(panel.grid = element_blank(), panel.background = element_blank(),
axis.line = element_line(color = 'black')) +
ylab("Average signature score") +xlab(" ")+
theme(legend.position = "none",
axis.text.x = element_text(colour = "black", family = "Times", size = 14),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.x = element_text(family = "Times", size = 14, face = "plain"),
plot.title = element_text(family = "Times", size = 17, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
P_T
compaired = list(c("TN", "MPR"),
c("TN", "NMPR"))
P_T = P_T+geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = T,test = t.test,tip_length = 0)
P_T
}
P_C1 = gene_boxplot(C1[1])+gene_boxplot(C1[2])+gene_boxplot(C1[3])
P_D1 = gene_boxplot(D1[1])+gene_boxplot(D1[2])+gene_boxplot(D1[3])
P_C1/P_D1
###文中主要是看了两个基因的变化,CX3CL1在MPR的患者中上调,AKR1C3在NMPR的患者中上调
ggsave('gene_outcome.pdf',width = 13,height = 8)
saveRDS(sce_obj, "sce.cancer_celltype.rds")
###GSVA分析
rm(list=ls())
sce.all=readRDS( "sce.cancer_celltype.rds")
scRNA <- sce.all
library(msigdbr)
library(GSVA)
library(Seurat)
#选择基因集
Idents(scRNA) = scRNA$patientoutcone
#Idents设置按什么来取组的表达均值(计算case control之间的均值也可以)
expr <- AverageExpression(scRNA, assays = "RNA", slot = "data")[[1]]
expr <- expr[rowSums(expr)>0,] #选取非零基因
expr <- as.matrix(expr)
library(GSEABase)
gmt <- 'msigdb_v2023.1.Hs_files_to_download_locally/msigdb_v2023.1.Hs_files_to_download_locally/msigdb_v2023.1.Hs_GMTs/h.all.v2023.1.Hs.symbols.gmt'
geneSet <- getGmt(gmt)
# gsva(若为count矩阵,kcdf = 'Poisson',若为其他标准化后的数据,使用默认参数即可)
gsva_scores <- gsva(expr,geneSet,method = 'gsva',min.sz=5,max.sz=500,kcdf = 'Poisson')
head(gsva_scores)
gsva_scores = as.data.frame(gsva_scores)
library(stringr)
library(ggplot2)
library(ggsignif)
b = as.data.frame(t(gsva_scores['HALLMARK_ESTROGEN_RESPONSE_LATE',]))
colnames(b) = 'Estrogen response late'
phe = b
group = ifelse(str_detect(rownames(phe),"TN"),"TN",
ifelse(str_detect(rownames(phe),"NMPR"),"NMPR","MPR"))
phe$group = group
phe$group = factor(phe$group,levels =c("TN","MPR","NMPR"))
table(phe$group)
colnames(phe)
col = c("#508E42", "#DF5E38","#7B7DC4")
pathway = 'Estrogen response late'
P_T <- ggplot(phe,aes(x=group,y=!!rlang::sym(pathway),fill=group)) +
geom_boxplot(size=0.5,fill="white",outlier.fill="white",outlier.color="white",color=col) +
geom_jitter(aes(fill=group),width =0.2,shape = 21,size=2) +
scale_fill_manual(values = col)+
ggtitle(pathway) +
theme_bw() +
theme(panel.grid = element_blank(), panel.background = element_blank(),
axis.line = element_line(color = 'black')) +
ylab("Average signature score") +xlab(" ")+
theme(legend.position = "none",
axis.text.x = element_text(colour = "black", family = "Times", size = 14),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.x = element_text(family = "Times", size = 14, face = "plain"),
plot.title = element_text(family = "Times", size = 17, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
P_T
compaired = list(c("TN", "MPR"),
c("TN", "NMPR"))
P_T = P_T+geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = T,test = t.test,tip_length = 0)
P_T
gmt <- 'msigdb_v2023.1.Hs_GMTs/c5.go.bp.v2023.1.Hs.symbols.gmt'
geneSet <- getGmt(gmt)
# gsva(若为count矩阵,kcdf = 'Poisson',若为其他标准化后的数据,使用默认参数即可)
gsva_scores <- gsva(expr,geneSet,method = 'gsva',min.sz=5,max.sz=500,kcdf = 'Poisson')
head(gsva_scores)
gsva_scores = as.data.frame(gsva_scores)
library(stringr)
library(ggplot2)
library(ggsignif)
b = as.data.frame(t(gsva_scores['GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II',]))
colnames(b) = 'Antigen presentation via MHC-II'
phe = b
group = ifelse(str_detect(rownames(phe),"TN"),"TN",
ifelse(str_detect(rownames(phe),"NMPR"),"NMPR","MPR"))
phe$group = group
phe$group = factor(phe$group,levels =c("TN","MPR","NMPR"))
table(phe$group)
colnames(phe)
col = c("#508E42", "#DF5E38","#7B7DC4")
pathway = 'Antigen presentation via MHC-II'
P_T <- ggplot(phe,aes(x=group,y=!!rlang::sym(pathway),fill=group)) +
geom_boxplot(size=0.5,fill="white",outlier.fill="white",outlier.color="white",color=col) +
geom_jitter(aes(fill=group),width =0.2,shape = 21,size=2) +
scale_fill_manual(values = col)+
ggtitle(pathway) +
theme_bw() +
theme(panel.grid = element_blank(), panel.background = element_blank(),
axis.line = element_line(color = 'black')) +
ylab("Average signature score") +xlab(" ")+
theme(legend.position = "none",
axis.text.x = element_text(colour = "black", family = "Times", size = 14),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.x = element_text(family = "Times", size = 14, face = "plain"),
plot.title = element_text(family = "Times", size = 17, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
P_T
compaired = list(c("TN", "MPR"),
c("TN", "NMPR"))
P_T = P_T+geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = T,test = t.test,tip_length = 0)
P_T
MHC-II确实是在MPR的患者中上调了,但是这个文章立足之根本的雌激素反应对不上,实在是对不上啊。
(PS: 埋头苦干三个小时了,此时我正在看着楼下的人在小湖边钓鱼,好想去看😶🌫️)
rm(list=ls())
sce.all=readRDS( "sce.NKT_celltype.rds")
seuratObj <- sce.all
avg <- AverageExpression(object = seuratObj, assay = "RNA", group.by = "celltype")
a = avg$RNA
b = as.data.frame(a)
str(b)
genes <- c()
for (i in seq_along(genes_to_check)) {
genes <- c(genes, unlist(genes_to_check[[i]]))
}
gene_heatmap = b[genes,]
column_ha = HeatmapAnnotation('Cell type' = ifelse(str_detect(colnames(gene_heatmap),"NK"),"NK",
ifelse(str_detect(colnames(gene_heatmap),"CD8"),"CD8",
ifelse(str_detect(colnames(gene_heatmap),"CD4"),"CD4",
ifelse(str_detect(colnames(gene_heatmap),"Treg"),"Treg","T")))) )
row_ha <- rowAnnotation(
'Marker' = row)
col_fun = colorRamp2(c(0, 2, 7), c("#4EC0DB", "white", "#E63946"))
gene_list = genes_to_check
row <- character()
for (i in 1:length(gene_list)) {
row <- c(row, rep(names(gene_list)[i], length(gene_list[[i]])))
}
column = ifelse(str_detect(colnames(gene_heatmap),"NK"),"NK",
ifelse(str_detect(colnames(gene_heatmap),"CD8"),"CD8",
ifelse(str_detect(colnames(gene_heatmap),"CD4"),"CD4",
ifelse(str_detect(colnames(gene_heatmap),"Treg"),"Treg","T"))))
Heatmap(gene_heatmap,
column_split = column,
row_split = c('Marker' = row),
row_title = NULL,
heatmap_legend_param = list(
title = "Score gene expression",
legend_height = unit(4, "cm"),
title_position = "lefttop-rot"
),
top_annotation = column_ha,
left_annotation = row_ha,
col = col_fun,
cluster_rows = FALSE, cluster_columns = FALSE,column_title = NULL,
show_row_names = T,show_column_names = T
)
总觉得配色很扎眼。
###应该是基因的平均表达量
#取出CD8T
####NK和T细胞
table(seuratObj$celltype)
sce_CD8T = seuratObj[, seuratObj$celltype %in% c( 'CD8_GZMB','CD8_GZMK','CD8_HAVCR2','CD8_IL7R','CD8_STMN1' )]
avg <- AverageExpression(object = sce_CD8T, assay = "RNA", group.by = "patientoutcone")
a = avg$RNA
b = as.data.frame(a)
str(b)
gene_boxplot <- function(gene){
average_expression <- colMeans(submatrix)
average_expression = as.data.frame(average_expression)
colnames(average_expression) = gene
phe = average_expression
group = ifelse(str_detect(rownames(phe),"TN"),"TN",
ifelse(str_detect(rownames(phe),"NMPR"),"NMPR","MPR"))
phe$group = group
phe$group = factor(phe$group,levels =c("TN","MPR","NMPR"))
table(phe$group)
colnames(phe)
col = c("#508E42", "#DF5E38","#7B7DC4")
P_T <- ggplot(phe,aes(x=group,y=!!rlang::sym(gene),fill=group)) +
geom_boxplot(size=0.5,fill="white",outlier.fill="white",outlier.color="white",color=col) +
geom_jitter(aes(fill=group),width =0.2,shape = 21,size=2) +
scale_fill_manual(values = col)+
ggtitle(gene) +
theme_bw() +
theme(panel.grid = element_blank(), panel.background = element_blank(),
axis.line = element_line(color = 'black')) +
ylab("Average signature score") +xlab(" ")+
theme(legend.position = "none",
axis.text.x = element_text(colour = "black", family = "Times", size = 14),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.x = element_text(family = "Times", size = 14, face = "plain"),
plot.title = element_text(family = "Times", size = 17, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
P_T
compaired = list(c("TN", "MPR"),
c("TN", "NMPR"))
P_T = P_T+geom_signif(comparisons = compaired,step_increase = 0.1,map_signif_level = T,test = t.test,tip_length = 0)
P_T
}
# Cytotoxic基因名称列表
Cytotoxic_gene <- c("GZMA", "GZMB", "GZMK" ,"GNLY", "IFNG" ,"PRF1" ,"NKG7")
submatrix <- b[Cytotoxic_gene, ]
gene_boxplot('Cytotoxic signature')
# Exhausted基因名称列表
##看,确实有CD39,果然是高耗竭基因啊
gene_list
Exhausted_gene <- c("LAG3" , "TIGIT" , "PDCD1" , "HAVCR2", "CTLA4" , "LAYN" , "ENTPD1")
submatrix <- b[Exhausted_gene, ]
gene_boxplot('Exhausted signature')
#####下面是NK
table(seuratObj$celltype)
sce_NK = seuratObj[, seuratObj$celltype %in% c( 'NK_FCGR3A','NK_KLRD1' )]
avg <- AverageExpression(object = sce_NK, assay = "RNA", group.by = "patientoutcone")
a = avg$RNA
b = as.data.frame(a)
str(b)
# Cytotoxic基因名称列表
submatrix <- b[Cytotoxic_gene, ]
gene_boxplot('Cytotoxic signature')
# Exhausted基因名称列表
submatrix <- b[Exhausted_gene, ]
gene_boxplot('Exhausted signature')
#####下面是Treg
table(seuratObj$celltype)
sce_Treg = seuratObj[, seuratObj$celltype %in% c( 'Treg_CTLA4','Treg_SELL' )]
avg <- AverageExpression(object = sce_Treg, assay = "RNA", group.by = "patientoutcone")
a = avg$RNA
b = as.data.frame(a)
str(b)
# Exhausted基因名称列表
submatrix <- b[Exhausted_gene, ]
gene_boxplot('Exhausted signature')
###在未治疗组Treg这么高?,MPR组抑制性Treg下降了
这是CD8T细胞的细胞毒性基因和耗竭基因集的评分,当时忘了拼在一起给它们加上CD8T的标签了…
#####箱线图显示TN ( n = 3)、MPR ( n = 4)和NMPR ( n = 8)患者各T/NK细胞群的细胞比例。
####不同细胞群里亚组比例
phe = seuratObj@meta.data
table(phe$celltype)
table(phe$patient)
phe$patientoutcome = paste(phe$patient,phe$Pathologic,sep = '_')
tb=table(phe$celltype,
phe$patientoutcome)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var2) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
#write.csv(bar_per,file = "celltype_by_group_percent.csv")
library(ggplot2)
library(stringr)
bar_per$Var2
group = ifelse(str_detect(bar_per$Var2,"TN"),"TN",
ifelse(str_detect(bar_per$Var2,"NMPR"),"NMPR","MPR"))
bar_per$group = group
bar_per$group = factor(bar_per$group,levels =c("TN","MPR","NMPR"))
table(bar_per$Var1)
bar_per$Var1 = factor(bar_per$Var1,levels =c("B3_IGHD","B0_MS4A1","B1_IGHM",'B2_HSPA1A','B4_FCRL4','B5_CD83','B6_RGS13'))
col = c("#508E42", "#DF5E38","#7B7DC4")
P <- ggplot(bar_per, aes(x = Var1, y = percent, fill = group, color = group)) +
geom_boxplot(width = 0.6, outlier.shape = NA) +
geom_jitter(shape = 21, size = 2, position = position_jitterdodge(0.2)) +
scale_fill_manual(values = c("white", "white", "white")) +
scale_color_manual(values = col,
labels = c("TN(n=3)", "MPR(n=4)", "NMPR(n=8)"),
name = "Group") +
theme_bw() +
theme(legend.position = "bottom",
legend.direction = 'horizontal',
legend.justification = "left",
legend.box.just = "left",
legend.title = element_blank(),
axis.text.x = element_text(colour = "black",angle=45, hjust = 1, family = "Times", size = 12),
axis.text.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.y = element_text(family = "Times", size = 14, face = "plain"),
axis.title.x = element_text(family = "Times", size = 14, face = "plain"),
plot.title = element_text(family = "Times", size = 15, face = "bold", hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
labs(y = "Fraction (%) of immune cells",
x = "")+
guides(fill = FALSE)
P
p = P+
coord_cartesian(clip = 'off',ylim = c(0,1))+ #在非图形区域绘图,且要定好y轴范围
theme(plot.margin = margin(2.2,0.5,1.5,0.5,'cm'))+ #自定义图片上左下右的边框宽度
annotate('text',x=2.6,y=1.13,label='CD4 Tconv',size=7,family='serif',color='red')+
annotate('segment',x=0.55,xend=4.2,y=1.1,yend=1.1,color='red',cex=.8)+
annotate('text',x=6.8,y=1.13,label='CD8 T',size=7,family='serif',color='red')+
annotate('segment',x=4.7,xend=8.9,y=1.1,yend=1.1,color='red',cex=.8)+
annotate('text',x=10.5,y=1.13,label='NK',size=7,family='serif',color='red')+
annotate('segment',x=9.5,xend=11.5,y=1.1,yend=1.1,color='red',cex=.8)+
annotate('text',x=13.6,y=1.13,label='CD4 Treg',size=7,family='serif',color='red')+
annotate('segment',x=12.8,xend=14.3,y=1.1,yend=1.1,color='red',cex=.8)
p
ggsave('NK_T_por.pdf',width = 14.5,height = 7.5)
####活化的Tregs比naïve Tregs具有更强的免疫抑制功能,且与预后不良相关。
#持续激活Tregs(Treg_CTLA4,表达TNFRSF4和TNFRSF9)仅在MPR患者中降低。
#相对于TN患者,MPR和NMPR患者中naïve Tregs (Treg_SELL,表达SELL和LEF1)的比例均下降。
#MPR患者的Treg耗竭特征始终低于NMPR患者。
#文章中用的monocle2,这里也用2
###### 拟时序分析 ######
sce_4CD8T = seuratObj[, seuratObj$celltype %in% c( 'CD8_GZMB' ,'CD8_GZMK','CD8_HAVCR2','CD8_IL7R')]
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
library(monocle)
packageVersion("monocle")
#monocle构建CDS需要3个矩阵:expr.matrix、pd、fd
# 将Seurat中的对象转换为monocle识别的对象
#cds <- importCDS(GetAssayData(seurat.object))
#选择做拟时序的亚群
Mono_tj<-sce_4CD8T
Mono_matrix<-as(as.matrix(GetAssayData(Mono_tj,slot = "counts")), 'sparseMatrix')
#构建featuredata,一般featuredata需要两个col,一个是gene_id,一个是gene_short_name,row对应counts的rownames
feature_ann<-data.frame(gene_id=rownames(Mono_matrix),gene_short_name=rownames(Mono_matrix))
rownames(feature_ann)<-rownames(Mono_matrix)
Mono_fd<-new("AnnotatedDataFrame", data = feature_ann)
#Seurat object中的@meta.data一般会存放表型相关的信息如cluster、sample的来源、group等,所以选择将metadata转换为phenodata
sample_ann<-Mono_tj@meta.data
#rownames(sample_ann)<-colnames(Mono_matrix)
Mono_pd<-new("AnnotatedDataFrame", data =sample_ann)
#build new cell data set
Mono.cds<-newCellDataSet(Mono_matrix,phenoData =Mono_pd,featureData =Mono_fd,expressionFamily=negbinomial.size())
#预处理
Mono.cds <- estimateSizeFactors(Mono.cds)
Mono.cds <- estimateDispersions(Mono.cds)
#筛选基因,这里可以根据自己的需要筛选特定的基因
disp_table <- dispersionTable(Mono.cds)
#unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
Mono.cds <- setOrderingFilter(Mono.cds, unsup_clustering_genes$gene_id)
#用DDRtree 进行降维分析
Mono.cds <- reduceDimension(
Mono.cds,
max_components = 2,
method = 'DDRTree')
#计算psudotime值
Mono.cds <- orderCells(Mono.cds)
head(pData(Mono.cds))
plot_cell_trajectory(Mono.cds,color_by="celltype", size=1,show_backbone=TRUE)
ggsave("拟时序.pdf",width = 7,height = 6)
plot_cell_trajectory(Mono.cds,color_by="patient", size=1,show_backbone=TRUE)
plot_cell_trajectory(Mono.cds,color_by="Pathologic", size=1,show_backbone=TRUE)
p1 <- plot_cell_trajectory(Mono.cds, x = 1, y = 2, color_by = "celltype") +
theme(legend.position='none',panel.border = element_blank()) + #去掉第一个的legend
scale_color_manual(values = mycolors)
p2 <- plot_complex_cell_trajectory(Mono.cds, x = 1, y = 2,
color_by = "celltype")+
scale_color_manual(values = mycolors) +
theme(legend.title = element_blank())
p1|p2
trace('project2MST', edit = T, where = asNamespace("monocle"))
让我解释的话那只能一部分耗竭了,另一部分耗竭的被逆转了
CD8_IL7R和CD8_GZMK → CD8_GZMB → CD8_HAVCR2
从复现中学到最多的就是眼熟各种细胞亚群吧,还有,文献中稀奇古怪的图太多了,虽然不知道别人是通过什么方式画出来的,但是只求形似也想复现出来哈哈哈。话说为什么文中不对药物治疗中的药物进行分析呢?信迪利特瑞普利。如果大家对我纯手工代码不满意,也可以去看其它文章的官方代码,比如
前面我们介绍了新鲜出炉的一篇NC单细胞文章图表复现的7个笔记:
包括但不限于降维聚类分群和生物学命名,单细胞比例差异和表达量差异以及生物学功能注释,成纤维等细胞亚群的细分,单细胞高级分析等等。其它高级分析,比如转录因子分析 ,主要是对计算机资源消耗会比较大,我也多次分享过细节教程:
单细胞转录组数据分析的标准降维聚类分群,并且进行生物学注释后的结果。可以参考前面的例子:人人都能学会的单细胞聚类分群注释 ,我们演示了第一层次的分群。
如果你对单细胞数据分析还没有基础认知,可以看基础10讲:
如果是文章并没有提供表达量矩阵,或者说你对作者的矩阵不满意, 就有可能需要从fastq文件开始啦。
正常走cellranger的定量流程即可,代码我已经是多次分享了。参考:
差不多几个小时就可以完成全部的样品的cellranger的定量流程。基础知识非常重要,我们在单细胞天地多次分享过cellranger流程的笔记(2019年5月),大家可以自行前往学习,如下:
https://mp.weixin.qq.com/s/y39QEJSKqpvJl_QIXnYsRQ