Closed ixxmu closed 11 months ago
大家好,在单细胞RNA测序实验中,双细胞(Doublet)的存在会影响数据质量和结果的准确性。双细胞是指两个或更多个细胞在被溶解并用于测序之前被误认为是一个单一的细胞。由于双细胞含有两种或更多种不同细胞类型的RNA,因此会导致基因表达分析的偏差,从而产生虚假的细胞亚群或混淆不同类型细胞之间的差异。
DoubletFinder是一个R包,旨在识别和过滤出双细胞,以保证单细胞RNA测序数据的准确性和可靠性。该软件包使用两种方法来检测双细胞:(1)单细胞表达谱聚类;(2)特定基因对的共表达。使用这些方法,DoubletFinder可以快速、高效地筛选出单细胞测序数据中的双细胞,从而减少误差和提高数据质量。
测序过程中产生的双细胞是有规律的 ,10x平台的双细胞率(符合泊松分布),如下图。
假设我们已经获取的了seurat对象,但是我们还想要判断一下seurat对象里是否存在双细胞,并且去掉这些双细胞
1 加载pbmc和r包
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
"/home/data/t040413/R/yll/usr/local/lib/R/site-library",
"/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
sessionInfo()
getwd()
library(dplyr)
library(cowplot)
library(Seurat)
library(DoubletFinder)
dir.create("~/gzh/doubletfinder")
setwd("~/gzh/doubletfinder")
pbmc=readRDS("~/gzh/featureplot_dotplot_vlnplot/pbmc3k_final.rds")
# head(pbmc@meta.data)
# table(pbmc$cell.type)
# #之前命名有点问题,现在改一下
# pbmc=RenameIdents(pbmc,'Memory CD4 T'='CD14 Mono')
# pbmc=RenameIdents(pbmc,'CD14+ Mono'="Memory CD4 T cell")
# DimPlot(pbmc,label = TRUE)
# saveRDS(pbmc,file ="~/gzh/featureplot_dotplot_vlnplot/pbmc3k_final.rds" )
# 1取出count-----
case.data <-pbmc@assays$RNA@counts
2 先看下去除双细胞之前的样子
# 2先看下去除双细胞之前的样子 pre_doubletfinder-----
pre <- CreateSeuratObject(counts = cbind(case.data), project = "case")
pre
rownames(pre)
ncol(pre)
str(pre)
pre[["percent.mt"]] <- PercentageFeatureSet(pre, pattern = "^MT-")
pre
pdf("1_pre_doubletfinder.pdf")
VlnPlot(pre, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3)
dev.off()
3 doubletfinder去除双细胞全代码
#3如果是自己的数据,应该从cellranger的下机数据开始读取最好------
case <- CreateSeuratObject(counts = case.data, project = "case")
dim(case)
load_number <- dim(case)
load_number
case <- NormalizeData(case, normalization.method = "LogNormalize", scale.factor = 10000)
case <- FindVariableFeatures(case, selection.method = "vst", nfeatures = 2000)
case <- ScaleData(case)
case <- RunPCA(case)
case <- RunUMAP(case, dims = 1:30)
#####pK identification####################
#options("repos"=c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
# BiocManager::install('devtools')
# devtools::install_github('chris-mcginnis-ucsf/DoubletFinder',force = TRUE)
# devtools::install_github('chris-mcginnis-ucsf/DoubletFinder')
library('DoubletFinder')
sweep.res.list <- DoubletFinder::paramSweep_v3(case, PCs = 1:30, sct = FALSE)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
sweep.stats
bcmvn <- find.pK(sweep.stats)
mpK <- as.numeric(as.vector(bcmvn$pK[which.max(bcmvn$BCmetric)]))
dim(case) #2638
nExp_poi <- round(0.02*ncol(case)) ## 3000~2.3%
nExp_poi
case <- doubletFinder_v3(case, PCs = 1:30, pN = 0.25, pK = mpK, nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
case
paste("DF.classifications_", "0.25_", mpK, "_", nExp_poi, sep="")
pdf("1_doubletFinder_case.pdf")
DimPlot(case, pt.size = 1, label = TRUE, label.size = 5, reduction = "umap", group.by = "DF.classifications_0.25_0.02_53")
dev.off()
case_filter <- subset(case, DF.classifications_0.25_0.02_53 == "Singlet" )#
case_filter
DoubletFinder_number <- dim(case_filter)
DoubletFinder_number
##############
write.table(as.matrix(GetAssayData(object = case_filter, slot = "counts")),'case_doubletfinder.csv', sep = ',', row.names = T, col.names = T, quote = F)
write.table(rbind(load_number,DoubletFinder_number), 'case_filter_cell_number.csv', sep = ',', row.names = T, col.names = F, quote = F)
#########################################################
############################################################
####################################################################
###########################################################
我们看到有少量的双细胞
4 去除双细胞之后,重新降维聚类分群
after.data <- cbind(as.matrix(GetAssayData(object = case_filter, slot = "counts")))
head(after.data)[,1:9]
dim(after.data)
after <- CreateSeuratObject(counts = after.data, project = "case")
after[["percent.mt"]] <- PercentageFeatureSet(after, pattern = "^MT-")
pdf("1_after_doubletfinder.pdf")
VlnPlot(after, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3)
dev.off()
dim(after.data)
#标准流程------
subset_data = after%>%
Seurat::NormalizeData(verbose = FALSE) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData(verbose = FALSE) %>%
RunPCA(npcs = 50, verbose = FALSE) %>%
RunUMAP(dims = 1:30) %>%
RunTSNE(dims = 1:30) %>%
FindNeighbors() %>%
FindClusters()
ElbowPlot(subset_data, ndims = 50)
VlnPlot(subset_data, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
5 最后就是细胞命名,可以看下之前的直播
看完记得顺手点个“在看”哦!
https://mp.weixin.qq.com/s/NY2PHQUG54nLeTvkxe4nxw