Closed juliabelk closed 2 years ago
+1 I like this idea
Hi @juliabelk,
Sorry for the delays. I am now adding an addModuleScore to the new release. To use this you can use the following little tutorial below-
#Load Hidden Helper Functions
fn <- unclass(lsf.str(envir = asNamespace("ArchR"), all = TRUE))
for (i in seq_along(fn)) {
tryCatch({
eval(parse(text = paste0(fn[i], "<-ArchR:::", fn[i])))
}, error = function(x) {
})
}
#New Function
addModuleScore <- function(
ArchRProj = NULL,
useMatrix = NULL,
features = NULL,
groupBy = "Clusters",
nBin = 25,
nBgd = 100,
name = "Module",
seed = 1,
threads = getArchRThreads(),
logFile = createLogFile("addModuleScore")
){
if(!is.null(seed)) set.seed(seed)
#Get Feature DF
featureDF <- ArchR:::.getFeatureDF(head(getArrowFiles(ArchRProj),2), subGroup=useMatrix)
rownames(featureDF) <- paste0(featureDF$seqnames, ":", featureDF$idx)
featureDF$Match <- seq_len(nrow(featureDF))
if(useMatrix %ni% getAvailableMatrices(ArchRProj)){
stop("useMatrix not in available matrices! See getAvailableMatrices!")
}
matrixClass <- h5read(getArrowFiles(ArchRProj)[1], paste0(useMatrix, "/Info/Class"))
if(matrixClass == "Sparse.Assays.Matrix"){
if(!all(unlist(lapply(unlist(features), function(x) grepl(":",x))))){
.logMessage("When accessing features from a matrix of class Sparse.Assays.Matrix it requires seqnames\n(denoted by seqnames:name) specifying to which assay to pull the feature from.\nIf confused, try getFeatures(ArchRProj, useMatrix) to list out available formats for input!", logFile = logFile)
stop("When accessing features from a matrix of class Sparse.Assays.Matrix it requires seqnames\n(denoted by seqnames:name) specifying to which assay to pull the feature from.\nIf confused, try getFeatures(ArchRProj, useMatrix) to list out available formats for input!")
}
}
if(grepl(":",unlist(features)[1])){
sname <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,1]
name <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,2]
idx <- lapply(seq_along(name), function(x){
ix <- intersect(which(tolower(name[x]) == tolower(featureDF$name)), BiocGenerics::which(tolower(sname[x]) == tolower(featureDF$seqnames)))
if(length(ix)==0){
.logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", name[x]), logFile = logFile)
}
ix
}) %>% unlist
}else{
idx <- lapply(seq_along(unlist(features)), function(x){
ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1]
if(length(ix)==0){
.logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", unlist(features)[x]), logFile = logFile)
}
ix
}) %>% unlist
}
if(is.null(names(features))){
names(features) <- paste0(name, seq_along(features))
}else{
names(features) <- paste0(name, ".", names(features))
}
featuresUse <- featureDF[idx,]
featuresUse$Module <- Rle(stack(features)[,2])
#Get Averages
rS <- ArchR:::.getRowSums(ArrowFiles = getArrowFiles(ArchRProj), useMatrix = useMatrix)
rS <- rS[order(rS[,3]), ]
rS$Bins <- Rle(ggplot2::cut_number(x = rS[,3] + rnorm(length(rS[,3]))/1e30, n = nBin, labels = FALSE, right = FALSE))
rS$Match <- match(paste0(rS$seqnames, ":", rS$idx), rownames(featureDF))
if(nBgd > min(rS$Bins@lengths)){
stop("nBgd must be lower than ", min(rS$Bins@lengths), "!")
}
idxMatch <- match(paste0(featuresUse$seqnames, ":", featuresUse$idx), paste0(rS$seqnames, ":", rS$idx))
featuresUse$Bins <- as.vector(rS$Bins[idxMatch])
#MakeLists
featureList <- split(featuresUse$Match, featuresUse$Module)
moduleList <- split(featuresUse$Bins, featuresUse$Module)
binList <- split(rS$Match, rS$Bins)
dfM <- lapply(seq_along(featureList), function(x){
message("Computing Module ",x, " of ", length(featureList))
binx <- binList[moduleList[[x]]]
idxFgd <- featureList[[x]]
idxBgd <- unlist(lapply(binx, function(x) sample(x, nBgd)), use.names=FALSE)
m <- ArchR:::.getPartialMatrix(
ArrowFiles = getArrowFiles(ArchRProj),
featureDF = featureDF[c(idxFgd, idxBgd), ],
useMatrix = useMatrix,
cellNames = ArchRProj$cellNames,
threads = threads,
verbose = FALSE,
doSampleCells = FALSE
)
Matrix::colMeans(m[seq_along(idxFgd), ]) - Matrix::colMeans(m[-seq_along(idxFgd), ])
}) %>% Reduce("cbind", .)
for(x in seq_len(ncol(dfM))){
ArchRProj <- addCellColData(ArchRProj, data = dfM[,x], name=names(featureList)[x], cells=rownames(dfM), force = TRUE)
}
ArchRProj
}
#Test Function
projHeme3 <- addImputeWeights(projHeme3)
features <- list(
BScore = c("MS4A1", "CD79A", "CD74"),
TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1")
)
projHeme3 <- addModuleScore(projHeme3, features = features, useMatrix = "GeneScoreMatrix")
p1 <- plotEmbedding(projHeme3, name="Module.TScore", imputeWeights = getImputeWeights(projHeme3))
p2 <- plotEmbedding(projHeme3, name="Module.BScore", imputeWeights = getImputeWeights(projHeme3))
plotPDF(ggAlignPlots(p1,p2,draw=F,type="h"))
Hope this helps
Jeff
Just a follow up to this - is it possible to do a ModuleScore of peaks? I have been trying but
getFeatures(proj,useMatrix = "PeakMatrix")
seems to return NULL
getFeatures()
returns NULL because the PeakMatrix features dont have a column called name
. Not sure if this is by design.
@jgranja24 ?
Hey all -- I think I encountered an issue with this function in cases where you have only one set of genes. That is to say, this works as a feature set:
features <- list( earlyPMC = earlyPMC, germMarkers = germMarkers )
But this doesn't:
features <- list( earlyPMC = earlyPMC)
e.g
ArchRProj <- addModuleScore(ArchRProj, features = features, useMatrix = "GeneScoreMatrix")
Computing Module 1 of 1
Warning message in seq_len(ncol(dfM)): “first element used of 'length.out' argument” Error in seq_len(ncol(dfM)): argument must be coercible to non-negative integer Traceback:
The issue seems to come from the very last part of the function
for (x in seq_len(ncol(dfM))) { ArchRProj <- addCellColData(ArchRProj, data = dfM[, x], name = names(featureList)[x], cells = rownames(dfM), force = TRUE) }
This is because you get back a vector for dfM rather than a matrix/dataframe if your input list for features contains only a single set of genes. It is easy to fix by also adding a dummy feature set, but is not intuitive to the user.
Thanks @dagarfield - @jgranja24 will take a look.
Also, you can use permalinks to point us to the exact line of code you're referring to. Like this:
Hi. This is very nice and it works well with GeneScoreMatrix. I wanted to do the same with GeneIntegrationMatrix but it is not adapted and returns this error:
Error in addModuleScore(proj_all_3, features = c("TNFRSF17", "SDC1", "CD19", : nBgd must be lower than 1!
Do you think it is possible to manage it? Thanks
Looking at the code, it looks like addModuleScore()
may not have been coded to handle gene expression? But i agree that this is an important feature to have. @jgranja24 ?
Note that the documentation for this function appears to be for the addImputeWeights()
function currently:
https://www.archrproject.com/reference/addModuleScore.html
Would also really like to see this implemented for gene expression.
@j-andrews7 - thanks for pointing that out. I fixed the documentation via https://github.com/GreenleafLab/ArchR/commit/9366af8b6022f4d7f7d6ff3f65b16725c32fdde4 in release_1.0.2
branch
Still looking to carve out time to implement this for gene expression.
Hi, I get the following error when running addModuleScore while I did not get it before, do you have any idea what the problem is?
features <- list( BScore = Bcell, PlasmaScore = Plasmacell ) head(Bcell) [1] "AICDA" "AID" "ARNTL2" "BACH2" "BATF" "BATF3"
class(Bcell) [1] "character"
head(Plasmacell) [1] "AKT1" "ATF4" "ATF6" "BATF2" "BHLHA15" "BLIMP1"
class(Plasmacell) [1] "character"
ArchR_proj <- addModuleScore(MM_PreVsPost_KL_peak, features = features, useMatrix = "GeneScoreMatrix", name="Module_GeneScore") Error in .validInput(input = features, name = "features", valid = c("character")) : Input value for 'features' is not a character, (features = list) please supply valid input!
Thanks in advance
After reinstalling the last release, it looks like it is fine. devtools::install_github("GreenleafLab/ArchR", ref="release_1.0.2", repos = BiocManager::repositories())
addModuleScore()
has been updated and should work with all data types. The following changes were made on the dev
branch and will be incorporated into release_1.0.3
https://github.com/GreenleafLab/ArchR/commit/44a0d27840d4e11a6ee04bd9f26f01761b1ff017#diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887 https://github.com/GreenleafLab/ArchR/commit/f49141f6c2a9c25dc27784d91af40135c8f38ee2#diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887 https://github.com/GreenleafLab/ArchR/commit/9183c43f237c8d5cfa7d8db0cb99421482a857a3#diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887
addModuleScore() has been updated and should work with all data types. The following changes were made on the dev branch and will be incorporated into release_1.0.3
44a0d27 #diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887 f49141f #diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887 9183c43 #diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887
Hi @rcorces
I've updated to the latest version using devtools::install_github("GreenleafLab/ArchR", ref="release_1.0.3", repos = BiocManager::repositories())
, but addModuleScore still doesn't seem to work with GeneExpressionMatrix.
proj2 <- addModuleScore(proj2, features = features, useMatrix = "GeneExpressionMatrix")
Error in addModuleScore(proj2, features = features, useMatrix = "GeneExpressionMatrix") :
nBgd must be lower than 1!
@Ping-lin14 - I'm unable to recapitulate your error using my own dataset.
> features <- list(
BScore = c("MS4A1", "CD79A", "CD74"),
TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1")
)
> projMulti3 <- addModuleScore(projMulti3,
+ useMatrix = "GeneExpressionMatrix",
+ name = "Module",
+ features = features)
Computing Module 1 of 2
Computing Module 2 of 2
Overriding previous entry for Module.BScore
Overriding previous entry for Module.TScore
> plotEmbedding(projMulti3,
+ embedding = "UMAP_Combined",
+ colorBy = "cellColData",
+ name="Module.BScore",
+ imputeWeights = getImputeWeights(projMulti3))
Getting ImputeWeights
ArchR logging to : ArchRLogs/ArchR-plotEmbedding-ffb154258efcc-Date-2022-09-13_Time-16-36-24.log
If there is an issue, please report to github with logFile!
Imputing Matrix
Plotting Embedding
1
ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-ffb154258efcc-Date-2022-09-13_Time-16-36-24.log
>
Hello, I am not able to make it work
projMulti3 <- addModuleScore(project_Peaks_MACS2_RES0.5, useMatrix = "GeneExpressionMatrix",name = "Module",features = features) Error in addModuleScore(project_Peaks_MACS2_RES0.5, useMatrix = "GeneExpressionMatrix", : nBgd must be lower than 1!
My code is the following,
features <- list( BScore = c("MS4A1", "CD79A", "CD74"), TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1") ) projMulti3 <- addModuleScore(project_Peaks_MACS2_RES0.5, useMatrix = "GeneExpressionMatrix",name = "Module",features = features)
Chiming in to say that I am also unable to get this to work using an integrated GEX matrix with the dev branch. Gene score is fine though.
My best guess is that it has something to do with line 121 in ModuleScore.R (44a0d27 #diff-b31996f658c3e45aeb4f21aa5f5ddc7cc150d4383da40f3a1a3955351ce32887 f49141f #diff-) which calls on the variable nBin which is seemingly undefined, leading to bin values of 1. Not sure why this would be exclusive to GEX matrices tho.
Would it be possible to add a function to combine multiple gene scores? Such as the addModuleScore function in Seurat?
https://rdrr.io/github/satijalab/seurat/man/AddModuleScore.html
Thanks!