Closed ixxmu closed 1 year ago
AUCell
将每个regulons作为一个基因集、以细胞为单位进行打分,判断regulon的激活情况(单细胞基因集评分之AUCell)。B站同步播出:
https://www.bilibili.com/video/BV1Mg4y1c7wU?p=7
#一步法,这里大家不要运行,我们下面要做拆解动作
scenicOptions <- runSCENIC_3_scoreCells(scenicOptions, exprMat_log)
vignette("detailedStep_3_scoreCells", package="SCENIC")#step3的完整官方教程
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
##
## first, second
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
##
## shift
## Loading required package: GenomeInfoDb
#load("data/sceMouseBrain.RData")
#exprMat <- counts(sceMouseBrain)
#读入并设置一些参数选项
scenicOptions <- readRDS("int/scenicOptions.Rds")
skipBinaryThresholds=FALSE #是否跳过二元化步骤
skipHeatmap=FALSE #是否绘制AUCell score热图
skipTsne=FALSE #是否跳过tsne环节
将step2中的结果载入,保留包含至少10个基因的regulon,并将TF名称与调控基因数量paste在一起
regulons <- loadInt(scenicOptions, "regulons")
regulons <- regulons[order(lengths(regulons), decreasing=TRUE)]#按照regulon包含的基因数量排序
regulons <- regulons[lengths(regulons)>=10]#保留调控基因数大于10的regulon
if(length(regulons) <2) stop("Not enough regulons with at least 10 genes.")#如果regulon数量小于2,则终止runSCENIC_3_scoreCells()的运行
# 将TF名称加入regulon并重命名
regulons <- setNames(lapply(names(regulons), function(tf) sort(unique(c(gsub("_extended", "", tf), regulons[[tf]])))), names(regulons))
names(regulons) <- paste(names(regulons), " (",lengths(regulons), "g)", sep="")
names(regulons)[1:5]
## [1] "Tef_extended (448g)" "Tef (406g)" "Dlx5_extended (222g)"
## [4] "Sox9_extended (196g)" "Sox9 (155g)"
saveRDS(regulons, file=getIntName(scenicOptions, "aucell_regulons"))#将以TF名+调控基因数量为名称、填充值为regulon调控基因的list存入"int/3.1_regulons_forAUCell.Rds"
msg <- paste0(format(Sys.time(), "%H:%M"), "\tStep 3. Analyzing the network activity in each individual cell")
if(getSettings(scenicOptions, "verbose")) message(msg)
## 12:52 Step 3. Analyzing the network activity in each individual cell
msg <- paste0("\nNumber of regulons to evaluate on cells: ", length(regulons),
"\nBiggest (non-extended) regulons: \n",
paste("\t", grep("_extended",names(regulons),invert = T, value = T)[1:10], collapse="\n"))#汇报分析进度
if(getSettings(scenicOptions, "verbose")) message(msg)
##
## Number of regulons to evaluate on cells: 16
## Biggest (non-extended) regulons:
## Tef (406g)
## Sox9 (155g)
## Sox8 (112g)
## Irf1 (107g)
## Sox10 (107g)
## Dlx1 (100g)
## Dlx5 (51g)
## Stat6 (41g)
## NA
## NA
AUCell
判断regulon激活情况AUCell
是一个很好的工具,我们专门用一篇推送介绍过流程:单细胞基因集评分之AUCell
(1)以细胞为单位给基因降序排序(ranking)
if(is.data.frame(exprMat))
{
supportedClasses <- paste(gsub("AUCell_buildRankings,", "", methods("AUCell_buildRankings")), collapse=", ")
supportedClasses <- gsub("-method", "", supportedClasses)
supportedClasses#取出几种method的名称
stop("'exprMat' should be one of the following classes: ", supportedClasses,
"\n(data.frames are not supported. Please, convert the expression matrix to one of these classes.)")
#若exprMat不存在则报错
}
getSettings(scenicOptions,"seed")#随机数种子为123
## [1] 123
set.seed(getSettings(scenicOptions,"seed"))#设定随机数种子保证AUCell计算的可重复性
tryCatch({
pdf(file = paste0(getIntName(scenicOptions, "aucell_genesStatsPlot"),'.my.pdf')
)#这里原文是.openDev,便于理解我将其改为pdf()函数
aucellRankings <- AUCell_buildRankings(exprMat,
plotStats=TRUE, verbose=getSettings(scenicOptions, "verbose"))#这里由于版本原因nCores可能不再适用
abline(v=aucellRankings@nGenesDetected["1%"], col="skyblue3", lwd=5, lty=3)
dev.off()
},error = function(e) {
message("Catched error in AUCell_buildRankings() or in the histogram plot: ", e$message)
})
## Quantiles for the number of genes detected by cell:
## (Non-detected genes are shuffled at the end of the ranking. Keep it in mind when choosing the threshold for calculating the AUC).
## min 1% 5% 10% 50% 100%
## 46.00 58.99 77.00 84.80 154.00 342.00
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or
## useNames = TRUE.
## png
## 2
#在画布里画出来给大家看一下
saveRDS(aucellRankings, file=getIntName(scenicOptions, "aucell_rankings"))#将ranking存于"int/3.3_aucellRankings.Rds"
计算Regulon活性(通过AUCell score)
library(AUCell)
nCores <- getSettings(scenicOptions, "nCores")
regulonAUC <- AUCell_calcAUC(regulons,#这里相当于输入一个getset
aucellRankings, #输入ranking的同时也等于输入了表达矩阵
aucMaxRank=aucellRankings@nGenesDetected["5%"], #取rank处于前5%的基因用于计算,可以大大加快计算速度
nCores=nCores)
## Warning in .AUCell_calcAUC(geneSets = geneSets, rankings = rankings, nCores =
## nCores, : Using only the first 77 genes (aucMaxRank) to calculate the AUC.
## Using 8 cores with doMC.
regulonOrder <- orderAUC(regulonAUC) #这是一个在AUCell 1.5.1新加的功能,利用各modules的相似性对regulon排序
regulonAUC <- regulonAUC[regulonOrder,]#将AUCell结果进行排序
saveRDS(regulonAUC, file=getIntName(scenicOptions, "aucell_regulonAUC"))#将AUCell的regulon结果存放在 "int/3.4_regulonAUC.Rds"
(3)通过AUCell
阈值判定regulon是否激活
regulon的活性通常呈双峰分布或偏态分布,通过绘制AUCell score
的柱状图可以初步判断它们的分布方式。
cells_AUCellThresholds <- NULL
skipBinaryThresholds <- F
if(!skipBinaryThresholds)
{
cells_AUCellThresholds <- AUCell_exploreThresholds(regulonAUC,
smallestPopPercent=getSettings(scenicOptions,"aucell/smallestPopPercent"),
assignCells=TRUE, plotHist=FALSE,
verbose=FALSE, nCores=nCores)
#自动绘制直方图,并自动计算数个阈值,可以用来考虑一个基因集是on或off“(在cells_AUCellThresholds$aucThr中返回)。对应的阈值用匹配颜色的竖条表示,较粗的竖线表示软件默认选择的阈值(cells_AUCellThresholds$aucThr$selected),这一阈值能够最大限度的减少假阳性。
saveRDS(cells_AUCellThresholds, file=getIntName(scenicOptions, "aucell_thresholds"))#将AUCell score判定阈值存放在"int/3.5_AUCellThresholds.Rds"
#以Sox9为例的最佳阈值
cells_AUCellThresholds$`Sox9_extended (193g)`[['aucThr']]$selected
}
## NULL
整理阈值信息
regulonsCells <- getAssignments(cells_AUCellThresholds)#获取每种细胞的regulon通过阈值检验的信息
trhAssignment <- getThresholdSelected(cells_AUCellThresholds)#一键提取最佳阈值,不用写循环
trhAssignment <- signif(trhAssignment, 3)#取三位有效数字
commentsThresholds <- sapply(cells_AUCellThresholds, function(x) unname(x$aucThr$comment))
commentsThresholds#如果符合双峰分布或者偏态分布,comment为空
## Dlx1_extended (126g)
## "The AUC might follow a normal distribution (random gene-set?). "
## Dlx1 (100g)
## ""
## Tef_extended (448g)
## ""
## Tef (406g)
## ""
## Dlx5_extended (222g)
## "The AUC might follow a normal distribution (random gene-set?). "
## Dlx5 (51g)
## "The AUC might follow a normal distribution (random gene-set?). "
## Sox9_extended (196g)
## "The AUC might follow a normal distribution (random gene-set?). "
## Sox9 (155g)
## "The AUC might follow a normal distribution (random gene-set?). "
## Sox10_extended (131g)
## ""
## Sox10 (107g)
## ""
## Sox8_extended (153g)
## ""
## Sox8 (112g)
## ""
## Irf1_extended (154g)
## ""
## Irf1 (107g)
## ""
## Stat6_extended (83g)
## ""
## Stat6 (41g)
## ""
#将上述信息整合
table2edit <- cbind(regulon=names(cells_AUCellThresholds),
threshold=trhAssignment[names(cells_AUCellThresholds)],
nCellsAssigned=lengths(regulonsCells)[names(cells_AUCellThresholds)],
AUCellComment=commentsThresholds[names(cells_AUCellThresholds)],
nGenes=gsub("[\\(g\\)]", "", regmatches(names(cells_AUCellThresholds), gregexpr("\\(.*?\\)", names(cells_AUCellThresholds)))),
clusteringOrder=1:length(cells_AUCellThresholds),
# clusterGroup=regulonClusters[names(cells_AUCellThresholds)],#这一行的regulonClusters在全文中没有出现过
onlyNonDuplicatedExtended=(names(cells_AUCellThresholds) %in% onlyNonDuplicatedExtended(names(cells_AUCellThresholds))),
personalNotes="")
head(table2edit)
## regulon threshold nCellsAssigned
## Dlx1_extended (126g) "Dlx1_extended (126g)" "0.273" "45"
## Dlx1 (100g) "Dlx1 (100g)" "0.179" "47"
## Tef_extended (448g) "Tef_extended (448g)" "0.666" "105"
## Tef (406g) "Tef (406g)" "0.619" "102"
## Dlx5_extended (222g) "Dlx5_extended (222g)" "0.796" "0"
## Dlx5 (51g) "Dlx5 (51g)" "0.289" "0"
## AUCellComment
## Dlx1_extended (126g) "The AUC might follow a normal distribution (random gene-set?). "
## Dlx1 (100g) ""
## Tef_extended (448g) ""
## Tef (406g) ""
## Dlx5_extended (222g) "The AUC might follow a normal distribution (random gene-set?). "
## Dlx5 (51g) "The AUC might follow a normal distribution (random gene-set?). "
## nGenes clusteringOrder onlyNonDuplicatedExtended
## Dlx1_extended (126g) "126" "1" "FALSE"
## Dlx1 (100g) "100" "2" "TRUE"
## Tef_extended (448g) "448" "3" "FALSE"
## Tef (406g) "406" "4" "TRUE"
## Dlx5_extended (222g) "222" "5" "FALSE"
## Dlx5 (51g) "51" "6" "TRUE"
## personalNotes
## Dlx1_extended (126g) ""
## Dlx1 (100g) ""
## Tef_extended (448g) ""
## Tef (406g) ""
## Dlx5_extended (222g) ""
## Dlx5 (51g) ""
write.table(table2edit, file=getIntName(scenicOptions, "aucell_thresholdsTxt"), row.names=F, quote=F, sep="\t")
绘制热图
if(!skipHeatmap){
nCellsHeatmap <- min(500, ncol(regulonAUC))#若细胞数超过500则只画五百个细胞,否则画出全部细胞
cells2plot <- sample(colnames(regulonAUC), nCellsHeatmap)#随机抽出对应个数的细胞
cellInfo <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "cellInfo"), ifNotExists="null") #TODO check if exists, if not... create/ignore?
if(!is.null(cellInfo)) cellInfo <- data.frame(cellInfo)[cells2plot,,drop=F]#注释信息
colVars <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "colVars"), ifNotExists="null")
fileName <- getOutName(scenicOptions, "s3_AUCheatmap")#热图输出的图片在"output/Step3_RegulonActivity_heatmap"中
NMF::aheatmap(getAUC(regulonAUC)[,cells2plot],
annCol=cellInfo,
annColor=colVars,
main="AUC",
sub=paste("Subset of",nCellsHeatmap," random cells"),
filename=paste0(fileName,'.pdf'))#这个热图我们后面还会调整
}
在tSNE图上绘制出regulon活性,相当于Seurat::FeaturePlot()
if(!skipTsne){
tSNE_fileName <- tsneAUC(scenicOptions, aucType="AUC", onlyHighConf=FALSE) #"int/tSNE_AUC_50pcs_30perpl.Rds",即tSNE的默认参数为50pcs,perplexity为30
tSNE <- readRDS(tSNE_fileName)
fileName <- getOutName(scenicOptions, "s3_AUCtSNE_colAct")#tsne的regulon活性以html的格式存在"output/Step3_RegulonActivity_tSNE_colByActivity"
# plotTsne_AUCellHtml(scenicOptions, exprMat, fileName, tSNE)#这里可以将上述output/Step3_RegulonActivity_tSNE_colByActivity中的图片输入到html文件之中,这里由于拆解运行报了一些错误无法实现
sub <- ""; if("type" %in% names(tSNE)) sub <- paste0("t-SNE on ", tSNE$type)
sub#汇报一下分析情况
cellInfo <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "cellInfo"), ifNotExists="null")
colVars <- loadFile(scenicOptions, getDatasetInfo(scenicOptions, "colVars"), ifNotExists="null")
pdf(paste0(getOutName(scenicOptions, "s3_AUCtSNE_colProps"),".pdf"))
plotTsne_cellProps(tSNE$Y, cellInfo=cellInfo, colVars=colVars, cex=1, sub=sub)
dev.off()#将tSNE结果(无任何着色方式)输出到"output/Step3_RegulonActivity_tSNE_colByCellProps.pdf"
}
## png
## 2
#更新分析进度
scenicOptions@status$current <- 3
saveRDS(scenicOptions, file="int/scenicOptions.Rds")
[1]Aibar S, González-Blas CB, Moerman T, Huynh-Thu VA, Imrichova H, Hulselmans G, Rambow F, Marine JC, Geurts P, Aerts J, van den Oord J, Atak ZK, Wouters J, Aerts S. SCENIC: single-cell regulatory network inference and clustering. Nat Methods. 2017 Nov;14(11):1083-1086. doi: 10.1038/nmeth.4463. Epub 2017 Oct 9.
[2]https://github.com/aertslab/SCENIC
单细胞系列教程目录
如何联系我们
https://mp.weixin.qq.com/s/DgifZl6Pk8eSrRsbNFVxYA