GSE231920_B细胞亚群细分

GSE231920_B细胞亚群细分 by 生信漫漫学

前情提要

经过这周的学习和整理，完成了文章Fig1的复现，还是蛮开心的！

虽然之前定的目标是这周完成复现，但是开始复现就会发现有很多要考虑到的细节，所以也不纠结花的时间，慢慢学习慢慢整理。

复现的是这篇文章：

文章标题：《Single-Cell RNA-Sequencing Reveals Peripheral T Helper Cells Promoting the Development of IgG4-Related Disease by Enhancing B Cell Activation and Differentiation》

发表日期和杂志：2023年发表在International Journal of Molecular Sciences上

在线阅读链接：https://doi.org/10.3390/ijms241813735

文献阅读笔记：外周T辅助细胞通过增强B细胞激活和分化来促进IgG4相关疾病的发展

第一层次降维聚类分群：GSE231920第一层次降维聚类分群

GSE231920差异基因热图：GSE231920差异基因热图

hhh被指导后完成的热图美化：Scillus包美化热图

那今天开始Fig2的复现，文章的Fig2部分是提取B和plasma亚群进行了细分，以及GSVA分析。

提取需要的亚群走降维聚类分群

1. 首先加载第一层次降维聚类分群，然后注释后的结果文件。提取需要的亚群——B和plasma

####提取B细胞
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
dir.create("5-B")
setwd("5-B/") 
getwd()
source("../scRNA_scripts/mycolors.R")
set.seed(12345)

#加载第一层次降维聚类的结果文件
sce.all=readRDS( "../3-Celltype/sce_celltype.rds")
table(sce.all$celltype)

#提取B细胞以及plasma亚群并查看
sce.B = sce.all[, sce.all$celltype %in% c( 'B','plasma' )]
LayerData(sce.B, assay = "RNA", layer = "counts")
sce.B <- JoinLayers(sce.B)

as.data.frame(sce.B@assays$RNA$counts[1:10, 1:2])
head(sce.B@meta.data, 10)
table(sce.B$orig.ident)

2. 提取counts矩阵以及需要的metadata信息，重新走降维聚类分群流程

sce <- CreateSeuratObject(counts = sce.B@assays$RNA$counts,
                            meta.data = sce.B@meta.data[,1:4])
sce <- NormalizeData(sce, normalization.method =  "LogNormalize",  
                     scale.factor = 1e4)
GetAssay(sce,assay = "RNA")
sce <- FindVariableFeatures(sce, 
                            selection.method = "vst", nfeatures = 2000)  
sce <- ScaleData(sce) 
sce <- RunPCA(object = sce, pc.genes = VariableFeatures(sce))

# DimHeatmap(sce, dims = 1:12, cells = 100, balanced = TRUE)
# ElbowPlot(sce) 

sce <- FindNeighbors(sce, dims = 1:15)
sce <- FindClusters(sce, resolution = 0.2)

# for (res in c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5,0.8,1)) {
#   sce.test <- FindClusters(sce, resolution = res, algorithm = 1)
# }
# p2_tree=clustree(sce.test@meta.data, prefix = "RNA_snn_res.")
# p2_tree
# table(sce.test@meta.data$RNA_snn_res.0.2)

set.seed(321)
sce <- RunTSNE(object = sce, dims = 1:15, do.fast = TRUE)

sce <- RunUMAP(object = sce, dims = 1:5, do.fast = TRUE)

可以运行注释掉的内容，选择合适的dims和resolution值进行分群

3. 基于细胞亚群分群情况以及group可视化

#按照0.2的分辨率分群
cluster_tsne = DimPlot(sce,reduction = "tsne",group.by = "seurat_clusters",cols = my36colors,pt.size = 1.5)  +
  theme_dr(xlength = 0.22, ylength = 0.22, arrow = grid::arrow(length = unit(0.15, "inches"), type = "closed"))+
  theme(panel.grid = element_blank())

cluster_tsne

#按照疾病及对照组分群
group_tsne = DimPlot(sce, reduction = "tsne",cols = my36colors,pt.size = 0.8,
        group.by = "group",label = T)  +
  theme_dr(xlength = 0.22, ylength = 0.22, arrow = grid::arrow(length = unit(0.15, "inches"), type = "closed"))+
  theme(panel.grid = element_blank())

group_tsne

使用singleR进行注释

这边参考了胃癌文献复现中对T细胞亚群细分注释的代码，大家如果需要获取代码，可以联系小助理进群：单细胞文献复现月更交流群（门票99哦）

# singleR注释
library(celldex)
library(SingleR)
#hpca.se <- HumanPrimaryCellAtlasData()
#save(hpca.se,file = 'hpca.RData')
#bpe.se <- BlueprintEncodeData()
#save(bpe.se,file = 'bpe.RData')

#加载需要的数据库文件
load('hpca.RData')
load('bpe.RData')
unique(hpca.se$label.main)
unique(hpca.se$label.fine)
unique(bpe.se$label.main)
unique(bpe.se$label.fine)

#整理数据并注释
str(sce)
anno <- SingleR(sce@assays$RNA$data,
                ref = list(BP=bpe.se,HPCA=hpca.se),
                labels = list(bpe.se$label.fine,hpca.se$label.main),
                clusters = sce@meta.data$seurat_clusters
)

plotScoreHeatmap(anno,clusters = anno@rownames,show_colnames = T)
table(anno$labels)

PS：下载celldex数据库到本地可能会因为网速问题下不下来，我是用服务器下载然后传到本地的

#注释结果整理及可视化
celltype = data.frame(ClusterID=rownames(anno), 
                      celltype=anno$labels, 
                      stringsAsFactors = F) 

sce@meta.data$singleR = celltype[match(sce@meta.data$seurat_clusters,celltype$ClusterID),'celltype']
table(sce$singleR)

celltype_tsne = DimPlot(sce, reduction = "tsne",cols = my36colors,pt.size = 0.8,
                        group.by = "singleR",label = T)  +
  theme_dr(xlength = 0.22, ylength = 0.22, arrow = grid::arrow(length = unit(0.15, "inches"), type = "closed"))+
  theme(panel.grid = element_blank())

celltype_tsne

Idents(sce) = sce$singleR

使用singleR自动注释到一小群T和NK细胞，然后就可视化了一些marker基因确定一下

VlnPlot(sce,
        features = c("CD3D","CD3E","CD4","CD8A",'MS4A1','IGHG1','MZB1', 'CD79A','GNLY', 'KLRF1','AREG'),
        pt.size = 0,
        ncol = 4,
        cols=mycolors)

确实是NK和T，也没关系，B和T细胞这些本来就比较难分开，文章估计是剔除了这两个亚群。所以后续的结果和文章有些出入。

这里偷懒就直接用singleR注释啦，不过单细胞天地也有整理B细胞亚群细分的推文：B细胞细分亚群。可以参加一下进行手动注释！

细胞比例可视化

和第一层次降维聚类分群：GSE231920第一层次降维聚类分群中使用一样的方法进行可视化即可

##堆积柱状图
library(tidyr)
library(reshape2)

tb=table(sce$group, sce$singleR)
head(tb)
library (gplots) 
balloonplot(tb)
bar_data <- as.data.frame(tb)

bar_per <- bar_data %>% 
  group_by(Var1) %>%
  mutate(sum(Freq)) %>%
  mutate(percent = Freq / `sum(Freq)`)
head(bar_per) 

colnames(bar_per)

library(ggthemes)
p1 = ggplot(bar_per, aes(x = percent, y = Var1)) +
  geom_bar(aes(fill = Var2) , stat = "identity") + coord_flip() +
  theme(axis.ticks = element_line(linetype = "blank"),
        legend.position = "top",
        panel.grid.minor = element_line(colour = NA,linetype = "blank"), 
        panel.background = element_rect(fill = NA),
        plot.background = element_rect(colour = NA)) +
  labs(y = " ", fill = NULL)+labs(x = 'Relative proportion(%)')+
  scale_fill_manual(values=my36colors)+
  theme_few()+
  theme(plot.title = element_text(size=12,hjust=0.5))

p1

Top基因热图

这里是查看每个亚群的top基因，所以使用FindAllmarkers函数进行分析即可，然后也是使用Scillus进行可视化

## Top5 genes
library(dplyr) 
markers <- FindAllMarkers(sce, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
all.markers =markers %>% dplyr::select(gene, everything()) %>% subset(p_val<0.05)
dim(all.markers)
table(all.markers$cluster)
top5 =all.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)

sce.Scale <- ScaleData(subset(sce,downsample=100),features =  top5$gene  ) 


library(Scillus)

plot_heatmap(dataset = sce.Scale, 
             markers = top5$gene,
             sort_var = c("singleR"),
             anno_var = c("singleR"),
             anno_colors = list("Set2",                # RColorBrewer palette
                                my36colors,
                                "Reds",
                                "Greens"))

写在结尾

那Fig2里面对于B细胞亚群的细分就学到这里叭，接下来想整理下singleR自动注释的使用推文，然后学习一下GSVA分析流程。

之前是整理了自建库然后使用singleR进行注释的，这次一起看看singleR的celldex中的数据库使用吧！

SingleR及数据库资源包celldex简介

自行构建SingleR数据库

GSE161529—人海绵体单细胞亚群注释

ixxmu / mp_duty