Bioconductor / DelayedArray

A unified framework for working transparently with on-disk and in-memory array-like datasets
https://bioconductor.org/packages/DelayedArray
24 stars 9 forks source link

Preserve sparsity in IsoOpWithArgs. #82

Closed LTLA closed 3 years ago

LTLA commented 3 years ago

Closes #73:

library(DelayedArray)
library(Matrix)
mat <- rsparsematrix(1000, 1000, 0.01)
div <- runif(nrow(mat))
is_sparse(mat/div)
## [1] TRUE

out <- DelayedArray(mat)
is_sparse(out)
## [1] TRUE

out2 <- out / div
is_sparse(out2)
## [1] TRUE

ref <- as(out2, "dgCMatrix")
identical(ref, mat/div)
## [1] TRUE

If this looks good, happy to throw in some tests.

LTLA commented 3 years ago

To motivate this from the linked issues:

library(TENxPBMCData)
library(scater)
library(scran)

original <- TENxPBMCData('pbmc8k')
current <- original

system.time({
is.mito <- grep("MT", rowData(current)$Symbol_TENx)
stats <- perCellQCMetrics(current, subsets=list(Mito=is.mito))
high.mito <- isOutlier(stats$subsets_Mito_percent, type="higher")
current <- current[,!high.mito]

current <- logNormCounts(current)
dec <- modelGeneVar(current)
hvgs <- getTopHVGs(dec, prop=0.1)

set.seed(0)
library(batchelor)
out <- fastMNN(current, batch=rep(1:2, c(4000, ncol(current) - 4000)),
    subset.row=hvgs, BSPARAM=BiocSingular::RandomParam(deferred=TRUE))
})
##    user  system elapsed 
##  78.098   7.413  85.509 

current <- original
counts(current)@seed@seed@as_sparse <- TRUE # could be neater, but whatever.

system.time({
is.mito <- grep("MT", rowData(current)$Symbol_TENx)
stats <- perCellQCMetrics(current, subsets=list(Mito=is.mito))
high.mito <- isOutlier(stats$subsets_Mito_percent, type="higher")
current <- current[,!high.mito]

current <- logNormCounts(current)
dec <- modelGeneVar(current)
hvgs <- getTopHVGs(dec, prop=0.1)

set.seed(0)
library(batchelor)
out2 <- fastMNN(current, batch=rep(1:2, c(4000, ncol(current) - 4000)),
    subset.row=hvgs, BSPARAM=BiocSingular::RandomParam(deferred=TRUE))
})
##    user  system elapsed 
##  43.996   0.400  44.394 
hpages commented 3 years ago

Looks good. Thx @LTLA!

LTLA commented 3 years ago

Thanks. Do you want me to put in some tests, or do you want to do those yourself?