mschubert / clustermq

R package to send function calls as jobs on LSF, SGE, Slurm, PBS/Torque, or each via SSH
https://mschubert.github.io/clustermq/
Apache License 2.0
145 stars 26 forks source link

seurat v5 => Error in [[<-.data.frame(*tmp*, field, value = new("Seurat", assays = list(: replacement has 5000 rows, data has 1 #331

Closed nick-youngblut closed 2 weeks ago

nick-youngblut commented 2 weeks ago

reprex:

# libraries
library(Seurat)
library(SeuratData)
library(clustermq)
options(clustermq.scheduler = "slurm")

# load an example dataset (seurat object)
InstallData("pbmc3k")
pbmc = LoadData("pbmc3k", type = "pbmc3k.final")

# subset
num_cells = 100
num_features = 5000
pbmc = subset(pbmc, cells = colnames(pbmc)[1:num_cells], features = rownames(pbmc)[1:num_features])

# clustermq test function
fx = function(x) {
    library(Seurat)
    return(x)
}

ret = Q(fx, x=pbmc, pkgs=c("Seurat"), n_jobs=1, job_size=1, memory=12 * 1024)

The output:

Error in `[[<-.data.frame`(`*tmp*`, field, value = new("Seurat", assays = list(: replacement has 5000 rows, data has 1
Traceback:

1. Q(seurat_preprocess, x = pbmc, pkgs = c("Seurat"), n_jobs = 1, 
 .     job_size = 1, memory = 12 * 1024)
2. check_args(fun, iter, const)
3. `[[<-`(`*tmp*`, field, value = new("Seurat", assays = list(RNA = new("Assay5", 
 .     layers = list(data = new("dgCMatrix", i = c(29L, 73L, 80L, 

[  IT THEN DUMPS THE **ENTIRE** COUNT MATRIX ]

It appears that clustermq is not (de)serializing the Seurat v5 object correctly.

sessionInfo

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS/LAPACK: /home/nickyoungblut/miniforge3/envs/seurat-v5/lib/libopenblasp-r0.3.27.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Los_Angeles
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] clustermq_0.9.4         ggplot2_3.5.1           pbmc3k.SeuratData_3.1.4
[4] ifnb.SeuratData_3.1.0   SeuratData_0.2.2.9001   Seurat_5.1.0           
[7] SeuratObject_5.0.2      sp_2.1-4                dplyr_1.1.4            

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3          jsonlite_1.8.8             
  [3] magrittr_2.0.3              spatstat.utils_3.0-4       
  [5] zlibbioc_1.48.0             vctrs_0.6.5                
  [7] ROCR_1.0-11                 DelayedMatrixStats_1.24.0  
  [9] spatstat.explore_3.2-6      RCurl_1.98-1.14            
 [11] base64enc_0.1-3             S4Arrays_1.2.0             
 [13] htmltools_0.5.8.1           SparseArray_1.2.2          
 [15] sctransform_0.4.1           parallelly_1.37.1          
 [17] KernSmooth_2.23-24          htmlwidgets_1.6.4          
 [19] ica_1.0-3                   plyr_1.8.9                 
 [21] plotly_4.10.4               zoo_1.8-12                 
 [23] uuid_1.2-0                  igraph_2.0.3               
 [25] mime_0.12                   lifecycle_1.0.4            
 [27] pkgconfig_2.0.3             Matrix_1.6-5               
 [29] R6_2.5.1                    fastmap_1.2.0              
 [31] GenomeInfoDbData_1.2.11     MatrixGenerics_1.14.0      
 [33] fitdistrplus_1.1-11         future_1.33.2              
 [35] shiny_1.8.1.1               digest_0.6.35              
 [37] colorspace_2.1-0            patchwork_1.2.0            
 [39] S4Vectors_0.40.2            tensor_1.5                 
 [41] RSpectra_0.16-1             irlba_2.3.5.1              
 [43] GenomicRanges_1.54.1        progressr_0.14.0           
 [45] fansi_1.0.6                 spatstat.sparse_3.0-3      
 [47] httr_1.4.7                  polyclip_1.10-6            
 [49] abind_1.4-5                 compiler_4.3.3             
 [51] withr_3.0.0                 fastDummies_1.7.3          
 [53] MASS_7.3-60                 DelayedArray_0.28.0        
 [55] rappdirs_0.3.3              tools_4.3.3                
 [57] lmtest_0.9-40               httpuv_1.6.15              
 [59] future.apply_1.11.2         goftest_1.2-3              
 [61] glmGamPoi_1.14.0            glue_1.7.0                 
 [63] nlme_3.1-164                promises_1.3.0             
 [65] grid_4.3.3                  pbdZMQ_0.3-11              
 [67] Rtsne_0.17                  cluster_2.1.6              
 [69] reshape2_1.4.4              generics_0.1.3             
 [71] gtable_0.3.5                spatstat.data_3.0-4        
 [73] tidyr_1.3.1                 data.table_1.15.2          
 [75] XVector_0.42.0              utf8_1.2.4                 
 [77] BiocGenerics_0.48.1         spatstat.geom_3.2-9        
 [79] RcppAnnoy_0.0.22            ggrepel_0.9.5              
 [81] RANN_2.6.1                  pillar_1.9.0               
 [83] stringr_1.5.1               spam_2.10-0                
 [85] IRdisplay_1.1               RcppHNSW_0.6.0             
 [87] later_1.3.2                 splines_4.3.3              
 [89] lattice_0.22-6              survival_3.6-4             
 [91] deldir_2.0-4                tidyselect_1.2.1           
 [93] miniUI_0.1.1.1              pbapply_1.7-2              
 [95] gridExtra_2.3               IRanges_2.36.0             
 [97] SummarizedExperiment_1.32.0 scattermore_1.2            
 [99] stats4_4.3.3                Biobase_2.62.0             
[101] matrixStats_1.3.0           stringi_1.8.4              
[103] lazyeval_0.2.2              evaluate_0.23              
[105] codetools_0.2-20            tibble_3.2.1               
[107] cli_3.6.2                   uwot_0.1.16                
[109] IRkernel_1.3.2              xtable_1.8-4               
[111] reticulate_1.37.0           repr_1.1.7                 
[113] munsell_0.5.1               GenomeInfoDb_1.38.1        
[115] Rcpp_1.0.12                 globals_0.16.3             
[117] spatstat.random_3.2-3       png_0.1-8                  
[119] parallel_4.3.3              dotCall64_1.1-1            
[121] sparseMatrixStats_1.14.0    bitops_1.0-7               
[123] listenv_0.9.1               viridisLite_0.4.2          
[125] scales_1.3.0                ggridges_0.5.6             
[127] leiden_0.4.3.1              purrr_1.0.2                
[129] crayon_1.5.2                rlang_1.1.3                
[131] cowplot_1.1.3 
nick-youngblut commented 2 weeks ago

Providing the Seurat object as a constant variable works:

fx = function(x, pbmc) {
    library(Seurat)
    return(pbmc)
}

ret = Q(fx, x=1, const=list("pbmc"=pbmc), pkgs=c("Seurat"), n_jobs=1, job_size=1, memory=12 * 1024)
nick-youngblut commented 2 weeks ago

I see that Q was trying to iterate through the Seurat object instead of passing it directly. So, one must wrap a single Seurat object in an iterable:

fx = function(x) {
    library(Seurat)
    return(x)
}

ret = Q(fx, x=c(pbmc), pkgs=c("Seurat"), n_jobs=1, job_size=1, memory=12 * 1024)