Closed ixxmu closed 3 months ago
经过这周的学习和整理,完成了文章Fig1的复现,还是蛮开心的!
虽然之前定的目标是这周完成复现,但是开始复现就会发现有很多要考虑到的细节,所以也不纠结花的时间,慢慢学习慢慢整理。
复现的是这篇文章:
文章标题:《Single-Cell RNA-Sequencing Reveals Peripheral T Helper Cells Promoting the Development of IgG4-Related Disease by Enhancing B Cell Activation and Differentiation》
发表日期和杂志:2023年发表在International Journal of Molecular Sciences上
在线阅读链接:https://doi.org/10.3390/ijms241813735
文献阅读笔记:外周T辅助细胞通过增强B细胞激活和分化来促进IgG4相关疾病的发展
第一层次降维聚类分群:GSE231920第一层次降维聚类分群
GSE231920差异基因热图:GSE231920差异基因热图
hhh被指导后完成的热图美化:Scillus包美化热图
那今天开始Fig2的复现,文章的Fig2部分是提取B和plasma亚群进行了细分,以及GSVA分析。
1. 首先加载第一层次降维聚类分群,然后注释后的结果文件。提取需要的亚群——B和plasma
####提取B细胞
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
dir.create("5-B")
setwd("5-B/")
getwd()
source("../scRNA_scripts/mycolors.R")
set.seed(12345)
#加载第一层次降维聚类的结果文件
sce.all=readRDS( "../3-Celltype/sce_celltype.rds")
table(sce.all$celltype)
#提取B细胞以及plasma亚群并查看
sce.B = sce.all[, sce.all$celltype %in% c( 'B','plasma' )]
LayerData(sce.B, assay = "RNA", layer = "counts")
sce.B <- JoinLayers(sce.B)
as.data.frame(sce.B@assays$RNA$counts[1:10, 1:2])
head(sce.B@meta.data, 10)
table(sce.B$orig.ident)
2. 提取counts矩阵以及需要的metadata信息,重新走降维聚类分群流程
sce <- CreateSeuratObject(counts = sce.B@assays$RNA$counts,
meta.data = sce.B@meta.data[,1:4])
sce <- NormalizeData(sce, normalization.method = "LogNormalize",
scale.factor = 1e4)
GetAssay(sce,assay = "RNA")
sce <- FindVariableFeatures(sce,
selection.method = "vst", nfeatures = 2000)
sce <- ScaleData(sce)
sce <- RunPCA(object = sce, pc.genes = VariableFeatures(sce))
# DimHeatmap(sce, dims = 1:12, cells = 100, balanced = TRUE)
# ElbowPlot(sce)
sce <- FindNeighbors(sce, dims = 1:15)
sce <- FindClusters(sce, resolution = 0.2)
# for (res in c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5,0.8,1)) {
# sce.test <- FindClusters(sce, resolution = res, algorithm = 1)
# }
# p2_tree=clustree(sce.test@meta.data, prefix = "RNA_snn_res.")
# p2_tree
# table(sce.test@meta.data$RNA_snn_res.0.2)
set.seed(321)
sce <- RunTSNE(object = sce, dims = 1:15, do.fast = TRUE)
sce <- RunUMAP(object = sce, dims = 1:5, do.fast = TRUE)
可以运行注释掉的内容,选择合适的dims和resolution值进行分群
3. 基于细胞亚群分群情况以及group可视化
#按照0.2的分辨率分群
cluster_tsne = DimPlot(sce,reduction = "tsne",group.by = "seurat_clusters",cols = my36colors,pt.size = 1.5) +
theme_dr(xlength = 0.22, ylength = 0.22, arrow = grid::arrow(length = unit(0.15, "inches"), type = "closed"))+
theme(panel.grid = element_blank())
cluster_tsne
#按照疾病及对照组分群
group_tsne = DimPlot(sce, reduction = "tsne",cols = my36colors,pt.size = 0.8,
group.by = "group",label = T) +
theme_dr(xlength = 0.22, ylength = 0.22, arrow = grid::arrow(length = unit(0.15, "inches"), type = "closed"))+
theme(panel.grid = element_blank())
group_tsne
这边参考了胃癌文献复现中对T细胞亚群细分注释的代码,大家如果需要获取代码,可以联系小助理进群:单细胞文献复现月更交流群(门票99哦)
# singleR注释
library(celldex)
library(SingleR)
#hpca.se <- HumanPrimaryCellAtlasData()
#save(hpca.se,file = 'hpca.RData')
#bpe.se <- BlueprintEncodeData()
#save(bpe.se,file = 'bpe.RData')
#加载需要的数据库文件
load('hpca.RData')
load('bpe.RData')
unique(hpca.se$label.main)
unique(hpca.se$label.fine)
unique(bpe.se$label.main)
unique(bpe.se$label.fine)
#整理数据并注释
str(sce)
anno <- SingleR(sce@assays$RNA$data,
ref = list(BP=bpe.se,HPCA=hpca.se),
labels = list(bpe.se$label.fine,hpca.se$label.main),
clusters = sce@meta.data$seurat_clusters
)
plotScoreHeatmap(anno,clusters = anno@rownames,show_colnames = T)
table(anno$labels)
PS:下载celldex数据库到本地可能会因为网速问题下不下来,我是用服务器下载然后传到本地的
#注释结果整理及可视化
celltype = data.frame(ClusterID=rownames(anno),
celltype=anno$labels,
stringsAsFactors = F)
sce@meta.data$singleR = celltype[match(sce@meta.data$seurat_clusters,celltype$ClusterID),'celltype']
table(sce$singleR)
celltype_tsne = DimPlot(sce, reduction = "tsne",cols = my36colors,pt.size = 0.8,
group.by = "singleR",label = T) +
theme_dr(xlength = 0.22, ylength = 0.22, arrow = grid::arrow(length = unit(0.15, "inches"), type = "closed"))+
theme(panel.grid = element_blank())
celltype_tsne
Idents(sce) = sce$singleR
使用singleR自动注释到一小群T和NK细胞,然后就可视化了一些marker基因确定一下
VlnPlot(sce,
features = c("CD3D","CD3E","CD4","CD8A",'MS4A1','IGHG1','MZB1', 'CD79A','GNLY', 'KLRF1','AREG'),
pt.size = 0,
ncol = 4,
cols=mycolors)
确实是NK和T,也没关系,B和T细胞这些本来就比较难分开,文章估计是剔除了这两个亚群。所以后续的结果和文章有些出入。
这里偷懒就直接用singleR注释啦,不过单细胞天地也有整理B细胞亚群细分的推文:B细胞细分亚群。可以参加一下进行手动注释!
和第一层次降维聚类分群:GSE231920第一层次降维聚类分群中使用一样的方法进行可视化即可
##堆积柱状图
library(tidyr)
library(reshape2)
tb=table(sce$group, sce$singleR)
head(tb)
library (gplots)
balloonplot(tb)
bar_data <- as.data.frame(tb)
bar_per <- bar_data %>%
group_by(Var1) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
colnames(bar_per)
library(ggthemes)
p1 = ggplot(bar_per, aes(x = percent, y = Var1)) +
geom_bar(aes(fill = Var2) , stat = "identity") + coord_flip() +
theme(axis.ticks = element_line(linetype = "blank"),
legend.position = "top",
panel.grid.minor = element_line(colour = NA,linetype = "blank"),
panel.background = element_rect(fill = NA),
plot.background = element_rect(colour = NA)) +
labs(y = " ", fill = NULL)+labs(x = 'Relative proportion(%)')+
scale_fill_manual(values=my36colors)+
theme_few()+
theme(plot.title = element_text(size=12,hjust=0.5))
p1
这里是查看每个亚群的top基因,所以使用FindAllmarkers函数进行分析即可,然后也是使用Scillus进行可视化
## Top5 genes
library(dplyr)
markers <- FindAllMarkers(sce, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
all.markers =markers %>% dplyr::select(gene, everything()) %>% subset(p_val<0.05)
dim(all.markers)
table(all.markers$cluster)
top5 =all.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
sce.Scale <- ScaleData(subset(sce,downsample=100),features = top5$gene )
library(Scillus)
plot_heatmap(dataset = sce.Scale,
markers = top5$gene,
sort_var = c("singleR"),
anno_var = c("singleR"),
anno_colors = list("Set2", # RColorBrewer palette
my36colors,
"Reds",
"Greens"))
那Fig2里面对于B细胞亚群的细分就学到这里叭,接下来想整理下singleR自动注释的使用推文,然后学习一下GSVA分析流程。
之前是整理了自建库然后使用singleR进行注释的,这次一起看看singleR的celldex中的数据库使用吧!
https://mp.weixin.qq.com/s/ZjnPEJRdPiD9ePRHiVQttw