巨噬细胞新分类体系（一篇Science文献复现）

巨噬细胞新分类体系（一篇Science文献复现） by 生信菜鸟团

❝
之前看到了曾老师写的关于巨噬细胞新分类体系（放弃传统M1和M2）的一篇推文，想起来自己之前写的推文对于巨噬细胞分类一直是按照M1和M2来的，主要是依据CD86和CD163的表达量来区分。正如这篇推文写的，巨噬细胞可以分类成为2个（TREM2和FOLR2基因的排他性），「同时曾老师留了一个学徒作业，我今天这周推文就是来尝试做一下这个学徒作业。」
❞

巨噬细胞新分类体系（放弃传统M1和M2)

还是先对单细胞数据进行按部就班的分析。

Step1: 导入数据

文章中给出的数据集是GSE234933，我把数据解压以后，发现是52个分开的rds格式的data, 就需要批量读入数据，并合并Seurat对象。（这部分需要注意一下）

对于也想复现这篇文献的小伙伴，提醒一下，数据合并以后有「近 9G」，没有服务器的小伙伴「不要盲目复现」~

rm(list=ls())
options(stringsAsFactors = F) 
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
library(data.table)
library(stringr)
getwd()
setwd("./GSE234933_raw/rds/")
###### step1:导入数据 ###### 
# 获取所有rds文件的列表
file_list <- list.files("./GSE234933_raw/rds/", pattern = ".rds")
# 创建一个空的列表来存储Seurat对象
seurat_list <- list()
# 循环读取每个rds文件的数据并创建Seurat对象
for (file in file_list) {
  # 拼接文件路径
  data.path <- paste0("./GSE234933_raw/rds/", file)
  # 读取RDS文件数据
  seurat_data <- readRDS(data.path)
  # 创建Seurat对象，并指定项目名称为文件名（去除后缀）
  sample_name <- tools::file_path_sans_ext(basename(file))
  seurat_obj <- CreateSeuratObject(counts = seurat_data,
                                   project = sample_name,
                                   min.features = 200,
                                   min.cells = 3)
  # 将Seurat对象添加到列表中
  seurat_list <- append(seurat_list, seurat_obj)
}
# 提取下划线前面的部分
sample_names <- sub("_.*", "", file_list)
# 合并Seurat对象，将所有Seurat对象合并到一个对象中
seurat_combined <- merge(seurat_list[[1]],
                         y = seurat_list[-1],
                         add.cell.ids = sample_names)
seurat_combined
saveRDS(seurat_combined, file = "seurat.rds")

#load data
sce<-readRDS("./seurat.rds")

head(rownames(sce@meta.data))
phe=str_split(rownames(sce@meta.data),'_',simplify = T)
head(phe)
sce@meta.data$orig.ident=paste0('',phe[,2])
table(sce@meta.data$orig.ident)

sce.all=sce
table(sce.all$orig.ident)

#########QC#########
########省略此步骤#######可以去看这个合集最早的推文#########
setwd('../')

Step2:降维分群和harmony

dir.create("2-harmony")
getwd()
setwd("2-harmony")

# sce.all=readRDS("../1-QC/sce.all_qc.rds")
sce=sce.all 
sce
sce <- NormalizeData(sce, 
                         normalization.method = "LogNormalize",
                         scale.factor = 1e4) 
sce <- FindVariableFeatures(sce)
sce <- ScaleData(sce)
sce <- RunPCA(sce, features = VariableFeatures(object = sce))

library(harmony)
seuratObj <- RunHarmony(sce, "orig.ident")
names(seuratObj@reductions)
seuratObj <- RunUMAP(seuratObj,  dims = 1:15, 
                     reduction = "harmony")
DimPlot(seuratObj,reduction = "umap",label=T ) 

sce=seuratObj
sce <- FindNeighbors(sce, reduction = "harmony",
                     dims = 1:15) 
sce.all=sce
#设置不同的分辨率，观察分群效果
for (res in c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5,0.8,1)) {
  sce.all=FindClusters(sce.all, #graph.name = "CCA_snn",
                       resolution = res, algorithm = 1)
}
colnames(sce.all@meta.data)
apply(sce.all@meta.data[,grep("RNA_snn",colnames(sce.all@meta.data))],2,table)

head(colnames(sce.all))
table(sce.all$orig.ident)
#接下来分析
sel.clust = "RNA_snn_res.0.05"
sce.all <- SetIdent(sce.all, value = sel.clust)
table(sce.all@active.ident) 
saveRDS(sce.all, "sce.all_int.rds")

marker gene

genes_to_check = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
                   'CD19', 'CD79A', 'MS4A1' ,
                   'IGHG1', 'MZB1', 'SDC1',
                   'CD68', 'CD163', 'CD14', 
                   'TPSAB1' , 'TPSB2',  # mast cells,
                   'RCVRN','FPR1' , 'ITGAM' ,
                   'C1QA',  'C1QB',  # mac
                   'S100A9', 'S100A8', 'MMP19',# monocyte
                   'FCGR3A','LAMP3', 'IDO1','IDO2',## DC3 
                   'CD1E','CD1C', # DC2
                   'KLRB1','NCR1', # NK 
                   'FGF7','MME', 'ACTA2', ## human  fibo 
                   'DCN', 'LUM',  'GSN' , ## mouse PDAC fibo 
                   'MKI67' , 'TOP2A', 
                   'PECAM1', 'VWF',  ## endo 
                   'EPCAM' , 'KRT19','KRT7', # epi 
                   'FYXD2', 'TM4SF4', 'ANXA4',# cholangiocytes
                   'APOC3', 'FABP1',  'APOA1',  # hepatocytes
                   'Serpina1c','PROM1', 'ALDH1A1' )
# EPCAM, keratin 19 [KRT19], and KRT7); 
# cholangiocytes (  marked with FYXD2, TM4SF4, and ANXA4); 
# hepatocytes ( marked with APOC3, FABP1, and APOA1);

library(stringr)  
genes_to_check=str_to_upper(genes_to_check)
genes_to_check
p <- DotPlot(sce.all, features = unique(genes_to_check),
             assay='RNA',group.by = "RNA_snn_res.0.05"  )  + coord_flip()

p 
ggsave('check_last_markers.pdf',height = 11,width = 11)

Step3: 细胞亚群命名

celltype=data.frame(ClusterID=0:10,
                    celltype= 0:10) 

#定义细胞亚群 
celltype[celltype$ClusterID %in% c(0,7),2]='T'  
celltype[celltype$ClusterID %in% c( 2),2]='Macropage'  
celltype[celltype$ClusterID %in% c( 8),2]='mast'   
celltype[celltype$ClusterID %in% c( 1,9,10),2]='Epi-tumor' 
celltype[celltype$ClusterID %in% c( 6),2]='fibo' 
celltype[celltype$ClusterID %in% c(5),2]='Endo' 
celltype[celltype$ClusterID %in% c(4),2]='cycling'
celltype[celltype$ClusterID %in% c(3),2]='B'

head(celltype)
celltype
table(celltype$celltype)

sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.0.05== celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)


th=theme(axis.text.x = element_text(angle = 45, 
                                    vjust = 0.5, hjust=0.5)) 

library(patchwork)
p_all_markers=DotPlot(sce.all, features = genes_to_check,
                      assay='RNA' ,group.by = 'celltype' )  + coord_flip()+th
p_umap=DimPlot(sce.all, reduction = "umap", group.by = "celltype",label = T,label.box = T)
p+p_umap
ggsave('markers_umap_by_celltype_2.pdf',width = 13,height = 8)

「初次分群，Macrophage部分并没有分的很清楚，后续还会把巨噬细胞提出来做分析。」

根据文章中图上的基因来看

Tumor=c("KRT8", "EPCAM", "Krt5", "TP63", "CDH1")
Stroma=c("FAP", "PDGFRB", "COL1A1","ACTA2","TAGLN","DCN","CDH5","TIE1","VWF","PECAM1")
Immune=c("FCN1", "APOE", "C1QB", "SPP1", "CXCL9","CD1E","CXCR2","CXCR1",
         "KIT","CD3D","CD8A","MS4A1","CD79A","PTPRC")  
genes_to_check =list(
  Tumor=Tumor,
  Stroma=Stroma,
  Immune=Immune
)
library(stringr)
genes_to_check = lapply(genes_to_check, str_to_upper)
p_all_markers=DotPlot(sce.all, 
                      features = genes_to_check,
                      scale = T,assay='RNA',group.by = "RNA_snn_res.0.05"  )+
  theme(axis.text.x=element_text(angle=45,hjust = 1))
p_all_markers
ggsave('check_GSE234944_markers.pdf',height = 11,width = 11)

「复现的图」

「由于篇幅有限和本次使用的数据量有点大，下周继续对巨噬细胞亚群进行细致的分析，也看看巨噬细胞新的定义分类~」

微信交流群

如果你也想参与到群里的讨论中，给我提出一些推文更新建议的话欢迎入群。同时你也可以得到我前期更新的所有数据及代码，在群里我会把代码分享给大家，而且大家可以在群里「提出自己感兴趣的单细胞文献」「我们尽可能优先选择大家的数据集去复现和实战，也欢迎大家在群里分享更多其它资料，咱们一起进步，」「比较高级的单细胞分析」我们也会一起尝试，相信你肯定是会不虚此行哈。

附上之前推文的链接为爱发电不可取（单细胞周更需要你的支持）

「「目前群里已经有300多人，所以你想要进群的话就需要添加我的微信，私聊给我发99元的红包，我把你拉进群里。」」

我们这个群是真正的付费交流群，提供数据，代码，学习资料，也会跟大家互动交流，还会坚持进行创作至少一年。如果介意99元的门槛且不认可我们知识分享的理念，「请不要加我好友」。。

ixxmu / mp_duty