aertslab / create_cisTarget_databases

Create cisTarget databases
37 stars 8 forks source link

How can I use the "genes_vs_motifs.feather" into R SCENIC #30

Open CNChunhua opened 1 year ago

CNChunhua commented 1 year ago

Recently, I created goat feather files but I didn't know how to use the feather file as the R SCENIC input file here is my code image


library(Seurat)
library(tidyverse)
library(patchwork)
library(SCENIC)
library(AUCell)
library(RcisTarget)
library(GENIE3)
rm(list=ls())
getwd()

##准备细胞meta信息
##Merge
setwd("F:/SCGM_RDS")
scRNA <- readRDS("SCGM_merge_cluster-Normalizetion-PCA-UMAP-SNE.rds")
(n=length(unique(scRNA@meta.data$seurat_clusters)))
celltype=data.frame(ClusterID=0:(n-1),
                    celltype='Unknow')
##Merge
celltype[celltype$ClusterID %in% c(4),2]='BCs' #1
celltype[celltype$ClusterID %in% c(18),2]='ESCs' #2
celltype[celltype$ClusterID %in% c(14),2]='MPCs' #3
celltype[celltype$ClusterID %in% c(1),2]='MuSC1' #4
celltype[celltype$ClusterID %in% c(6),2]='MuSC2' #5
celltype[celltype$ClusterID %in% c(8),2]='MuSC3' #6 
celltype[celltype$ClusterID %in% c(),2]='Mb1' #7
celltype[celltype$ClusterID %in% c(12),2]='Mb2' #8
celltype[celltype$ClusterID %in% c(),2]='Mb3' #9 
celltype[celltype$ClusterID %in% c(13),2]='Mb4' #10 
celltype[celltype$ClusterID %in% c(3,19),2]='SMCs' #11
celltype[celltype$ClusterID %in% c(0,7),2]='FAPs' #12
celltype[celltype$ClusterID %in% c(11),2]='Fbs' #13
celltype[celltype$ClusterID %in% c(16),2]='Aps' #14
celltype[celltype$ClusterID %in% c(15),2]='Tcs' #15
celltype[celltype$ClusterID %in% c(2,5,9,20),2]='ECs' #16
celltype[celltype$ClusterID %in% c(17),2]='ICs' #17
celltype[celltype$ClusterID %in% c(10),2]='Unknow' #18
scRNA@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  scRNA@meta.data[which(scRNA@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
scRNA@meta.data$celltype1<-factor(scRNA@meta.data$celltype,levels = c('BCs','ESCs','MPCs','MuSC1','MuSC2','MuSC3','Mb2','Mb4','SMCs','FAPs','Fbs','Aps','Tcs','ECs','ICs','Unknow'))

###############################################
cellInfo <- data.frame(scRNA@meta.data)
colnames(cellInfo)[which(colnames(cellInfo)=="orig.ident")] <- "sample"
colnames(cellInfo)[which(colnames(cellInfo)=="seurat_clusters")] <- "cluster"
colnames(cellInfo)[which(colnames(cellInfo)=="celltype1")] <- "celltype"
cellInfo <- cellInfo[,c("sample","cluster","celltype")]
setwd("F:\\works\\单细胞转录调控网络分析\\单细胞转录调控网络分析")
#saveRDS(cellInfo, file="cellInfo.Rds")
cellInfo <- readRDS(file="cellInfo.Rds")
##准备表达矩阵
#为了节省计算资源,随机抽取1000个细胞的数据子集
subcell <- sample(colnames(scRNA),1000)
scRNAsub <- scRNA[,subcell]
#saveRDS(scRNAsub, "scRNAsub.rds")
scRNAsub <- readRDS(file="scRNAsub.rds")
exprMat <- as.matrix(scRNAsub@assays$RNA@counts)
##设置分析环境
mydbDIR <- "F:\\works\\意大利期间单细胞\\SCENIC"

##修改feather文件
#于python中修改
##
mydbs <- c("500up.genes_vs_motifs.rankings.feather",
           "10k.genes_vs_motifs.rankings.feather")

#mydbs <- c("hg19-500bp-upstream-7species.mc8nr.feather",
#           "hg19-tss-centered-10kb-7species.mc8nr.feather")
names(mydbs) <- c("500bp", "10kb")
#scenicOptions <- initializeScenic(org="hgnc", 
  #                       dbDir=mydbDIR , nCores=4)

scenicOptions <- initializeScenic(org="hgnc", 
                                  nCores=2,
                                  dbDir=mydbDIR, 
                                  dbs = mydbs,
                                  datasetTitle = "GOAT")
#saveRDS(scenicOptions, "int/scenicOptions.rds")

##==转录调控网络推断==##
##基因过滤
#过滤标准是基因表达量之和>细胞数*3%,且在1%的细胞中表达
genesKept <- geneFiltering(exprMat, scenicOptions, 
                           minCountsPerGene = 3 * 0.01 * ncol(exprMat), 
                           minSamples = ncol(exprMat) * 0.01)
#genesKept <- rownames(exprMat)#不过滤
exprMat_filtered <- exprMat[genesKept, ]
##计算相关性矩阵
runCorrelation(exprMat_filtered, scenicOptions)
##TF-Targets相关性回归分析
exprMat_filtered_log <- log2(exprMat_filtered+1)
runGenie3(exprMat_filtered_log, scenicOptions, nParts = 10)
#这一步消耗的计算资源非常大,个人电脑需要几个小时的运行时间
##推断共表达模块
runSCENIC_1_coexNetwork2modules(scenicOptions)
##推断转录调控网络(regulon)
runSCENIC_2_createRegulons(scenicOptions)
#以上代码可增加参数coexMethod=c("w001", "w005", "top50", "top5perTarget", "top10perTarget", "top50perTarget"))
#默认6种方法的共表达网络都计算,可以少选几种方法以减少计算量
##regulons计算AUC值并进行下游分析
#exprMat_all <- as.matrix(scRNA@assays$RNA@counts)
#exprMat_all <- log2(exprMat_all+1)
exprMat_all <- readRDS("F:\\works\\意大利期间单细胞\\SCENIC\\scenicOptions.rds")
runSCENIC_3_scoreCells(scenicOptions, exprMat=exprMat_all)
#使用shiny互动调整阈值
aucellApp <- plotTsne_AUCellApp(scenicOptions, exprMat_all)
savedSelections <- shiny::runApp(aucellApp)
#保存调整后的阈值
newThresholds <- savedSelections$thresholds
scenicOptions@fileNames$int["aucell_thresholds",1] <- "int/newThresholds.Rds"
saveRDS(newThresholds, file=getIntName(scenicOptions, "aucell_thresholds"))
saveRDS(scenicOptions, file="int/scenicOptions.Rds")
runSCENIC_4_aucell_binarize(scenicOptions, exprMat=
```exprMat_all)

I dont know if its right
Thanks for answering my question