zhanghao-njmu / SCP

An end-to-end Single-Cell Pipeline designed to facilitate comprehensive analysis and exploration of single-cell data.
https://zhanghao-njmu.github.io/SCP/
GNU General Public License v3.0
364 stars 80 forks source link

RunDEtest and FeatureHeatmap-enrichment take too long on my own dataset and can't run #221

Closed browningChen closed 9 months ago

browningChen commented 10 months ago
[2023-12-10 15:16:40.548335] Start Enrichment
Workers: 10
Species: Homo_sapiens
Loading cached db: GO_BP version:3.18.0 nterm:15709 created:2023-12-10 10:16:48.902751
Loading cached db: KEGG version:Release 108.0+/12-09, Dec 23 nterm:356 created:2023-12-10 10:18:06.892529
Permform enrichment...

> obj <- RunDEtest(srt = obj, 
+                  group_by = "RNA_snn_res.0.1",BPPARAM = BiocParallel::SnowParam(workers = 12),
+                  fc.threshold = 1.5, only.pos = FALSE)  
[2023-12-10 14:15:16.450714] Start DEtest
Workers: 12
Find all markers(wilcox) among 11 groups...

> pancreas_sub <- RunDEtest(srt = pancreas_sub, group_by = "CellType", fc.threshold = 1, only.pos = FALSE)
Warning in RunDEtest(srt = pancreas_sub, group_by = "CellType", fc.threshold = 1,  :
  Data in the 'data' slot is raw counts. Perform NormalizeData(LogNormalize) on the data.
[2023-12-10 14:11:10.927606] Start DEtest
Workers: 10
Find all markers(wilcox) among 5 groups...
  |====================================================================================| 100%

I got some confused about RunDEtest and FeatureHeatmap I can well perform them on test data , but when I turn to my own seurat object it can't work.. T_T hopes for ur helps :D sincerely

> sessionInfo()
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22621)

Matrix products: default

locale:
[1] LC_COLLATE=Chinese (Simplified)_China.utf8  LC_CTYPE=Chinese (Simplified)_China.utf8   
[3] LC_MONETARY=Chinese (Simplified)_China.utf8 LC_NUMERIC=C                               
[5] LC_TIME=Chinese (Simplified)_China.utf8    

time zone: Asia/Hong_Kong
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
[1] openxlsx_4.2.5.2   HGNChelper_0.8.1   SeuratObject_5.0.0 Seurat_4.4.0       dplyr_1.1.4        SCP_0.5.6         

loaded via a namespace (and not attached):
  [1] fs_1.6.3                      matrixStats_1.1.0             spatstat.sparse_3.0-3         bitops_1.0-7                 
  [5] enrichplot_1.22.0             HDO.db_0.99.1                 httr_1.4.7                    RColorBrewer_1.1-3           
  [9] doParallel_1.0.17             tools_4.3.1                   sctransform_0.4.1             utf8_1.2.4                   
 [13] R6_2.5.1                      HDF5Array_1.30.0              lazyeval_0.2.2                uwot_0.1.16                  
 [17] rhdf5filters_1.14.1           GetoptLong_1.0.5              withr_2.5.2                   sp_2.1-2                     
 [21] prettyunits_1.2.0             gridExtra_2.3                 progressr_0.14.0              cli_3.6.1                    
 [25] Biobase_2.62.0                Cairo_1.6-2                   spatstat.explore_3.2-5        scatterpie_0.2.1             
 [29] labeling_0.4.3                spatstat.data_3.0-3           ggridges_0.5.4                pbapply_1.7-2                
 [33] slingshot_2.10.0              Rsamtools_2.18.0              yulab.utils_0.1.0             gson_0.1.0                   
 [37] DOSE_3.28.1                   R.utils_2.12.3                parallelly_1.36.0             rstudioapi_0.15.0            
 [41] RSQLite_2.3.3                 generics_0.1.3                gridGraphics_0.5-1            shape_1.4.6                  
 [45] ica_1.0-3                     spatstat.random_3.2-2         zip_2.3.0                     GO.db_3.18.0                 
 [49] Matrix_1.6-4                  fansi_1.0.5                   S4Vectors_0.40.2              abind_1.4-5                  
 [53] R.methodsS3_1.8.2             lifecycle_1.0.4               yaml_2.3.7                    SummarizedExperiment_1.32.0  
 [57] rhdf5_2.46.1                  qvalue_2.34.0                 SparseArray_1.2.2             BiocFileCache_2.10.1         
 [61] Rtsne_0.16                    grid_4.3.1                    blob_1.2.4                    promises_1.2.1               
 [65] crayon_1.5.2                  miniUI_0.1.1.1                lattice_0.22-5                cowplot_1.1.1                
 [69] KEGGREST_1.42.0               magick_2.8.1                  pillar_1.9.0                  ComplexHeatmap_2.18.0        
 [73] fgsea_1.28.0                  GenomicRanges_1.54.1          rjson_0.2.21                  future.apply_1.11.0          
 [77] codetools_0.2-19              fastmatch_1.1-4               leiden_0.4.3.1                glue_1.6.2                   
 [81] ggfun_0.1.3                   data.table_1.14.8             remotes_2.4.2.1               treeio_1.26.0                
 [85] vctrs_0.6.5                   png_0.1-8                     spam_2.10-0                   gtable_0.3.4                 
 [89] cachem_1.0.8                  princurve_2.1.6               Signac_1.12.0                 S4Arrays_1.2.0               
 [93] mime_0.12                     tidygraph_1.2.3               survival_3.5-7                SingleCellExperiment_1.24.0  
 [97] RcppRoll_0.3.0                iterators_1.0.14              interactiveDisplayBase_1.40.0 ellipsis_0.3.2               
[101] fitdistrplus_1.1-11           ROCR_1.0-11                   nlme_3.1-164                  ggtree_3.10.0                
[105] bit64_4.0.5                   progress_1.2.2                filelock_1.0.2                RcppAnnoy_0.0.21             
[109] GenomeInfoDb_1.38.1           R.cache_0.16.0                irlba_2.3.5.1                 KernSmooth_2.23-22           
[113] colorspace_2.1-0              BiocGenerics_0.48.1           DBI_1.1.3                     tidyselect_1.2.0             
[117] processx_3.8.2                proxyC_0.3.4                  bit_4.0.5                     compiler_4.3.1               
[121] curl_5.1.0                    xml2_1.3.6                    DelayedArray_0.28.0           plotly_4.10.3                
[125] shadowtext_0.1.2              scales_1.3.0                  lmtest_0.9-40                 callr_3.7.3                  
[129] rappdirs_0.3.3                stringr_1.5.1                 digest_0.6.33                 goftest_1.2-3                
[133] spatstat.utils_3.0-4          XVector_0.42.0                htmltools_0.5.7               pkgconfig_2.0.3              
[137] MatrixGenerics_1.14.0         dbplyr_2.4.0                  fastmap_1.1.1                 rlang_1.1.2                  
[141] GlobalOptions_0.1.2           htmlwidgets_1.6.3             shiny_1.8.0                   farver_2.1.1                 
[145] zoo_1.8-12                    jsonlite_1.8.8                BiocParallel_1.36.0           GOSemSim_2.28.0              
[149] R.oo_1.25.0                   RCurl_1.98-1.13               magrittr_2.0.3                GenomeInfoDbData_1.2.11      
[153] ggplotify_0.1.2               dotCall64_1.1-1               patchwork_1.1.3               Rhdf5lib_1.24.0              
[157] munsell_0.5.0                 Rcpp_1.0.11                   TrajectoryUtils_1.10.0        ggnewscale_0.4.9             
[161] ape_5.7-1                     viridis_0.6.4                 reticulate_1.34.0             stringi_1.8.2                
[165] ggraph_2.1.0                  zlibbioc_1.48.0               MASS_7.3-60                   AnnotationHub_3.10.0         
[169] plyr_1.8.9                    pkgbuild_1.4.2                parallel_4.3.1                listenv_0.9.0                
[173] ggrepel_0.9.4                 deldir_2.0-2                  Biostrings_2.70.1             graphlayouts_1.0.2           
[177] splines_4.3.1                 tensor_1.5                    hms_1.1.3                     circlize_0.4.15              
[181] ps_1.7.5                      igraph_1.5.1                  spatstat.geom_3.2-7           reshape2_1.4.4               
[185] biomaRt_2.58.0                stats4_4.3.1                  BiocVersion_3.18.1            XML_3.99-0.16                
[189] RcppParallel_5.1.7            BiocManager_1.30.22           renv_1.0.3                    foreach_1.5.2                
[193] tweenr_2.0.2                  httpuv_1.6.12                 RANN_2.6.1                    tidyr_1.3.0                  
[197] purrr_1.0.2                   polyclip_1.10-6               future_1.33.0                 clue_0.3-65                  
[201] scattermore_1.2               ggplot2_3.4.4                 ggforce_0.4.1                 xtable_1.8-4                 
[205] tidytree_0.4.5                later_1.3.1                   viridisLite_0.4.2             snow_0.4-4                   
[209] tibble_3.2.1                  clusterProfiler_4.10.0        aplot_0.2.2                   memoise_2.0.1                
[213] AnnotationDbi_1.64.1          IRanges_2.36.0                cluster_2.1.6                 globals_0.16.2
browningChen commented 9 months ago

Thoes are due to BiocParallel's conflict,I finaly ran by changing BiocParallel to future in RunDEtest&RunEnrichment

RunDEtest2 <- function (srt, group_by = NULL, group1 = NULL, group2 = NULL, 
    cells1 = NULL, cells2 = NULL, features = NULL, markers_type = c("all", 
        "paired", "conserved", "disturbed"), grouping.var = NULL, 
    meta.method = c("maximump", "minimump", "wilkinsonp", "meanp", 
        "sump", "votep"), test.use = "wilcox", only.pos = TRUE, 
    fc.threshold = 1.5, base = 2, pseudocount.use = 1, mean.fxn = NULL, 
    min.pct = 0.1, min.diff.pct = -Inf, max.cells.per.ident = Inf, 
    latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, 
    norm.method = "LogNormalize", p.adjust.method = "bonferroni", 
    slot = "data", assay = NULL, #BPPARAM = BiocParallel::bpparam(), 
    seed = 11, verbose = TRUE, ...) 
{
    set.seed(seed)
    markers_type <- match.arg(markers_type)
    meta.method <- match.arg(meta.method)
    if (markers_type %in% c("conserved", "disturbed")) {
        if (is.null(grouping.var)) {
            stop("'grouping.var' must be provided when finding conserved or disturbed markers")
        }
    }
    assay <- assay %||% DefaultAssay(srt)
    status <- check_DataType(srt, slot = slot, assay = assay)
    if (slot == "counts" && status != "raw_counts") {
        stop("Data in the 'counts' slot is not raw counts.")
    }
    if (slot == "data" && status != "log_normalized_counts") {
        if (status == "raw_counts") {
            warning("Data in the 'data' slot is raw counts. Perform NormalizeData(LogNormalize) on the data.", 
                immediate. = TRUE)
            srt <- NormalizeData(object = srt, assay = assay, 
                normalization.method = "LogNormalize", verbose = FALSE)
        }
        if (status == "raw_normalized_counts") {
            warning("Data in the 'data' slot is raw_normalized_counts. Perform NormalizeData(LogNormalize) on the data.", 
                immediate. = TRUE)
            srt <- NormalizeData(object = srt, assay = assay, 
                normalization.method = "LogNormalize", verbose = FALSE)
        }
        if (status == "unknown") {
            warning("Data in the 'data' slot is unknown. Please check the data type.")
        }
    }
    #bpprogressbar(BPPARAM) <- TRUE
    #bpRNGseed(BPPARAM) <- seed
    time_start <- Sys.time()
    if (verbose) {
        message(paste0("[", time_start, "] ", "Start DEtest"))
        #message("Workers: ", bpworkers(BPPARAM))
    }
    if (fc.threshold < 1) {
        stop("fc.threshold must be greater than or equal to 1")
    }
    if (!is.null(cells1) || !is.null(group1)) {
        if (is.null(cells1)) {
            if (is.null(group_by)) {
                stop("'group_by' must be provided when 'group1' specified")
            }
            cells1 <- colnames(srt)[srt[[group_by, drop = TRUE]] %in% 
                group1]
        }
        if (is.null(cells2) && !is.null(group2)) {
            cells2 <- colnames(srt)[srt[[group_by, drop = TRUE]] %in% 
                group2]
        }
        if (!all(cells1 %in% colnames(srt))) {
            stop("cells1 has some cells not in the Seurat object.")
        }
        if (is.null(cells2)) {
            cells2 <- colnames(srt)[!colnames(srt) %in% cells1]
            group2 <- "others"
        }
        if (!all(cells2 %in% colnames(srt))) {
            stop("cells2 has some cells not in the Seurat object.")
        }
        if (length(cells1) < 3 || length(cells2) < 3) {
            stop("Cell groups must have more than 3 cells")
        }
        if (verbose) {
            message("Find ", markers_type, " markers(", test.use, 
                ") for custom cell groups...")
        }
        if (markers_type == "all") {
            markers <- FindMarkers(object = Assays(srt, assay), 
                slot = slot, cells.1 = cells1, cells.2 = cells2, 
                features = features, test.use = test.use, logfc.threshold = log(fc.threshold, 
                  base = base), base = base, min.pct = min.pct, 
                min.diff.pct = min.diff.pct, max.cells.per.ident = max.cells.per.ident, 
                min.cells.feature = min.cells.feature, min.cells.group = min.cells.group, 
                latent.vars = latent.vars, only.pos = only.pos, 
                norm.method = norm.method, pseudocount.use = pseudocount.use, 
                mean.fxn = mean.fxn, verbose = FALSE, ...)
            if (!is.null(markers) && nrow(markers) > 0) {
                markers[, "gene"] <- rownames(markers)
                markers[, "group1"] <- group1 %||% "group1"
                markers[, "group2"] <- group2 %||% "group2"
                rownames(markers) <- NULL
                markers[, "group1"] <- factor(markers[, "group1"], 
                  levels = unique(markers[, "group1"]))
                if ("p_val" %in% colnames(markers)) {
                  markers[, "p_val_adj"] <- p.adjust(markers[, 
                    "p_val"], method = p.adjust.method)
                }
                markers[, "test_group_number"] <- as.integer(table(markers[["gene"]])[markers[, 
                  "gene"]])
                MarkersMatrix <- as.data.frame.matrix(table(markers[, 
                  c("gene", "group1")]))
                markers[, "test_group"] <- apply(MarkersMatrix, 
                  1, function(x) {
                    paste0(colnames(MarkersMatrix)[x > 0], collapse = ";")
                  })[markers[, "gene"]]
                srt@tools[["DEtest_custom"]][[paste0("AllMarkers_", 
                  test.use)]] <- markers
                srt@tools[["DEtest_custom"]][["cells1"]] <- cells1
                srt@tools[["DEtest_custom"]][["cells2"]] <- cells2
            }
            else {
                warning("No markers found.", immediate. = TRUE)
            }
        }
        if (markers_type == "conserved") {
            markers <- FindConservedMarkers2(object = srt, assay = assay, 
                slot = slot, cells.1 = cells1, cells.2 = cells2, 
                features = features, grouping.var = grouping.var, 
                test.use = test.use, logfc.threshold = log(fc.threshold, 
                  base = base), base = base, min.pct = min.pct, 
                min.diff.pct = min.diff.pct, max.cells.per.ident = max.cells.per.ident, 
                min.cells.feature = min.cells.feature, min.cells.group = min.cells.group, 
                latent.vars = latent.vars, only.pos = only.pos, 
                norm.method = norm.method, meta.method = meta.method, 
                pseudocount.use = pseudocount.use, mean.fxn = mean.fxn, 
                verbose = FALSE, ...)
            if (!is.null(markers) && nrow(markers) > 0) {
                markers[, "gene"] <- rownames(markers)
                markers[, "group1"] <- group1 %||% "group1"
                markers[, "group2"] <- group2 %||% "group2"
                rownames(markers) <- NULL
                markers[, "group1"] <- factor(markers[, "group1"], 
                  levels = unique(markers[, "group1"]))
                if ("p_val" %in% colnames(markers)) {
                  markers[, "p_val_adj"] <- p.adjust(markers[, 
                    "p_val"], method = p.adjust.method)
                }
                markers[, "test_group_number"] <- as.integer(table(markers[["gene"]])[markers[, 
                  "gene"]])
                MarkersMatrix <- as.data.frame.matrix(table(markers[, 
                  c("gene", "group1")]))
                markers[, "test_group"] <- apply(MarkersMatrix, 
                  1, function(x) {
                    paste0(colnames(MarkersMatrix)[x > 0], collapse = ";")
                  })[markers[, "gene"]]
                srt@tools[["DEtest_custom"]][[paste0("ConservedMarkers_", 
                  test.use)]] <- markers
                srt@tools[["DEtest_custom"]][["cells1"]] <- cells1
                srt@tools[["DEtest_custom"]][["cells2"]] <- cells2
            }
            else {
                warning("No markers found.", immediate. = TRUE)
            }
        }
        if (markers_type == "disturbed") {
            srt_tmp <- srt
            srt_tmp[[grouping.var, drop = TRUE]][setdiff(colnames(srt_tmp), 
                cells1)] <- NA
            #bpprogressbar(BPPARAM) <- FALSE
            srt_tmp <- RunDEtest(srt = srt_tmp, assay = assay, 
                slot = slot, group_by = grouping.var, markers_type = "all", 
                features = features, test.use = test.use, fc.threshold = fc.threshold, 
                base = base, min.pct = min.pct, min.diff.pct = min.diff.pct, 
                max.cells.per.ident = max.cells.per.ident, min.cells.feature = min.cells.feature, 
                min.cells.group = min.cells.group, latent.vars = latent.vars, 
                only.pos = only.pos, norm.method = norm.method, 
                p.adjust.method = p.adjust.method, pseudocount.use = pseudocount.use, 
                mean.fxn = mean.fxn, #BPPARAM = BPPARAM, 
                seed = seed, 
                verbose = FALSE, ...)
            markers <- srt_tmp@tools[[paste0("DEtest_", grouping.var)]][[paste0("AllMarkers_", 
                test.use)]]
            if (!is.null(markers) && nrow(markers) > 0) {
                colnames(markers) <- gsub("group", "var", colnames(markers))
                markers[["group1"]] <- group1 %||% "group1"
                srt@tools[["DEtest_custom"]][[paste0("DisturbedMarkers_", 
                  test.use)]] <- markers
                srt@tools[["DEtest_custom"]][["cells1"]] <- cells1
            }
            else {
                warning("No markers found.", immediate. = TRUE)
            }
        }
    }
    else {
        if (is.null(group_by)) {
            cell_group <- Idents(srt)
            group_by <- "active.ident"
        }
        else {
            cell_group <- srt[[group_by, drop = TRUE]]
        }
        if (!is.factor(cell_group)) {
            cell_group <- factor(cell_group, levels = unique(cell_group))
        }
        cell_group <- lapply(levels(cell_group), function(x) {
            cell <- cell_group[cell_group == x]
            out <- sample(cell, size = min(max.cells.per.ident, 
                length(cell)), replace = FALSE)
            return(out)
        })
        cell_group <- setNames(unlist(lapply(cell_group, function(x) x), 
            use.names = FALSE), unlist(lapply(cell_group, names)))
        args1 <- list(object = Assays(srt, assay), slot = slot, 
            features = features, test.use = test.use, logfc.threshold = log(fc.threshold, 
                base = base), base = base, min.pct = min.pct, 
            min.diff.pct = min.diff.pct, max.cells.per.ident = max.cells.per.ident, 
            min.cells.feature = min.cells.feature, min.cells.group = min.cells.group, 
            latent.vars = latent.vars, only.pos = only.pos, norm.method = norm.method, 
            pseudocount.use = pseudocount.use, mean.fxn = mean.fxn, 
            verbose = FALSE, ...)
        if (verbose) {
            message("Find ", markers_type, " markers(", test.use, 
                ") among ", nlevels(cell_group), " groups...")
        }
        if (markers_type == "all") {
            AllMarkers <- future.apply::future_lapply(levels(cell_group), FUN = function(group) {
                cells.1 <- names(cell_group)[which(cell_group == 
                  group)]
                cells.2 <- names(cell_group)[which(cell_group != 
                  group)]
                if (length(cells.1) < 3 || length(cells.2) < 
                  3) {
                  return(NULL)
                }
                else {
                  args1[["cells.1"]] <- cells.1
                  args1[["cells.2"]] <- cells.2
                  markers <- do.call(FindMarkers, args1)
                  if (!is.null(markers) && nrow(markers) > 0) {
                    markers[, "gene"] <- rownames(markers)
                    markers[, "group1"] <- as.character(group)
                    markers[, "group2"] <- "others"
                    if ("p_val" %in% colnames(markers)) {
                      markers[, "p_val_adj"] <- p.adjust(markers[, 
                        "p_val"], method = p.adjust.method)
                    }
                    return(markers)
                  }
                  else {
                    return(NULL)
                  }
                }
            })
            AllMarkers <- do.call(rbind.data.frame, AllMarkers)
            if (!is.null(AllMarkers) && nrow(AllMarkers) > 0) {
                rownames(AllMarkers) <- NULL
                AllMarkers[, "group1"] <- factor(AllMarkers[, 
                  "group1"], levels = levels(cell_group))
                AllMarkers[, "test_group_number"] <- as.integer(table(AllMarkers[["gene"]])[AllMarkers[, 
                  "gene"]])
                AllMarkersMatrix <- as.data.frame.matrix(table(AllMarkers[, 
                  c("gene", "group1")]))
                AllMarkers[, "test_group"] <- apply(AllMarkersMatrix, 
                  1, function(x) {
                    paste0(colnames(AllMarkersMatrix)[x > 0], 
                      collapse = ";")
                  })[AllMarkers[, "gene"]]
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("AllMarkers_", 
                  test.use)]] <- AllMarkers
            }
            else {
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("AllMarkers_", 
                  test.use)]] <- data.frame()
            }
        }
        if (markers_type == "paired") {
            pair <- expand.grid(x = levels(cell_group), y = levels(cell_group))
            pair <- pair[pair[, 1] != pair[, 2], , drop = FALSE]
            PairedMarkers <- future.apply::future_lapply(seq_len(nrow(pair)), function(i) {
                cells.1 <- names(cell_group)[which(cell_group == 
                  pair[i, 1])]
                cells.2 <- names(cell_group)[which(cell_group == 
                  pair[i, 2])]
                if (length(cells.1) < 3 || length(cells.2) < 
                  3) {
                  return(NULL)
                }
                else {
                  args1[["cells.1"]] <- cells.1
                  args1[["cells.2"]] <- cells.2
                  markers <- do.call(FindMarkers, args1)
                  if (!is.null(markers) && nrow(markers) > 0) {
                    markers[, "gene"] <- rownames(markers)
                    markers[, "group1"] <- as.character(pair[i, 
                      1])
                    markers[, "group2"] <- as.character(pair[i, 
                      2])
                    if ("p_val" %in% colnames(markers)) {
                      markers[, "p_val_adj"] <- p.adjust(markers[, 
                        "p_val"], method = p.adjust.method)
                    }
                    return(markers)
                  }
                  else {
                    return(NULL)
                  }
                }
            } )
            PairedMarkers <- do.call(rbind.data.frame, PairedMarkers)
            if (!is.null(PairedMarkers) && nrow(PairedMarkers) > 
                0) {
                rownames(PairedMarkers) <- NULL
                PairedMarkers[, "group1"] <- factor(PairedMarkers[, 
                  "group1"], levels = levels(cell_group))
                PairedMarkers[, "test_group_number"] <- as.integer(table(PairedMarkers[["gene"]])[PairedMarkers[, 
                  "gene"]])
                PairedMarkersMatrix <- as.data.frame.matrix(table(PairedMarkers[, 
                  c("gene", "group1")]))
                PairedMarkers[, "test_group"] <- apply(PairedMarkersMatrix, 
                  1, function(x) {
                    paste0(colnames(PairedMarkersMatrix)[x > 
                      0], collapse = ";")
                  })[PairedMarkers[, "gene"]]
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("PairedMarkers_", 
                  test.use)]] <- PairedMarkers
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("PairedMarkersMatrix_", 
                  test.use)]] <- PairedMarkersMatrix
            }
            else {
                warning("No markers found.", immediate. = TRUE)
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("PairedMarkers_", 
                  test.use)]] <- data.frame()
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("PairedMarkersMatrix_", 
                  test.use)]] <- NULL
            }
        }
        if (markers_type == "conserved") {
            ConservedMarkers <- future.apply::future_lapply(levels(cell_group), 
                FUN = function(group) {
                  cells.1 <- names(cell_group)[which(cell_group == 
                    group)]
                  cells.2 <- names(cell_group)[which(cell_group != 
                    group)]
                  if (length(cells.1) < 3 || length(cells.2) < 
                    3) {
                    return(NULL)
                  }
                  else {
                    args1[["cells.1"]] <- cells.1
                    args1[["cells.2"]] <- cells.2
                    args1[["object"]] <- srt
                    args1[["assay"]] <- assay
                    args1[["grouping.var"]] <- grouping.var
                    args1[["meta.method"]] <- meta.method
                    markers <- do.call(FindConservedMarkers2, 
                      args1)
                    if (!is.null(markers) && nrow(markers) > 
                      0) {
                      markers[, "gene"] <- rownames(markers)
                      markers[, "group1"] <- as.character(group)
                      markers[, "group2"] <- "others"
                      if ("p_val" %in% colnames(markers)) {
                        markers[, "p_val_adj"] <- p.adjust(markers[, 
                          "p_val"], method = p.adjust.method)
                      }
                      return(markers)
                    }
                    else {
                      return(NULL)
                    }
                  }
                } )
            ConservedMarkers <- do.call(rbind.data.frame, lapply(ConservedMarkers, 
                function(x) x[, c("avg_log2FC", "pct.1", "pct.2", 
                  "max_pval", "p_val", "p_val_adj", "gene", "group1", 
                  "group2")]))
            if (!is.null(ConservedMarkers) && nrow(ConservedMarkers) > 
                0) {
                rownames(ConservedMarkers) <- NULL
                ConservedMarkers[, "group1"] <- factor(ConservedMarkers[, 
                  "group1"], levels = levels(cell_group))
                ConservedMarkers[, "test_group_number"] <- as.integer(table(ConservedMarkers[["gene"]])[ConservedMarkers[, 
                  "gene"]])
                ConservedMarkersMatrix <- as.data.frame.matrix(table(ConservedMarkers[, 
                  c("gene", "group1")]))
                ConservedMarkers[, "test_group"] <- apply(ConservedMarkersMatrix, 
                  1, function(x) {
                    paste0(colnames(ConservedMarkersMatrix)[x > 
                      0], collapse = ";")
                  })[ConservedMarkers[, "gene"]]
                ConservedMarkers <- ConservedMarkers[, c("avg_log2FC", 
                  "pct.1", "pct.2", "max_pval", "p_val", "p_val_adj", 
                  "gene", "group1", "group2", "test_group_number", 
                  "test_group")]
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("ConservedMarkers_", 
                  test.use)]] <- ConservedMarkers
            }
            else {
                warning("No markers found.", immediate. = TRUE)
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("ConservedMarkers_", 
                  test.use)]] <- data.frame()
            }
        }
        if (markers_type == "disturbed") {
            #sub_BPPARAM <- SerialParam()
            #bpprogressbar(sub_BPPARAM) <- FALSE
            DisturbedMarkers <- future.apply::future_lapply(levels(cell_group), 
                FUN = function(group) {
                  cells.1 <- names(cell_group)[which(cell_group == 
                    group)]
                  srt_tmp <- srt
                  srt_tmp[[grouping.var, drop = TRUE]][setdiff(colnames(srt_tmp), 
                    cells.1)] <- NA
                  if (length(na.omit(unique(srt_tmp[[grouping.var, 
                    drop = TRUE]]))) < 2) {
                    return(NULL)
                  }
                  else {
                    srt_tmp <- RunDEtest(srt = srt_tmp, assay = assay, 
                      slot = slot, group_by = grouping.var, markers_type = "all", 
                      features = features, test.use = test.use, 
                      fc.threshold = fc.threshold, base = base, 
                      min.pct = min.pct, min.diff.pct = min.diff.pct, 
                      max.cells.per.ident = max.cells.per.ident, 
                      min.cells.feature = min.cells.feature, 
                      min.cells.group = min.cells.group, latent.vars = latent.vars, 
                      only.pos = only.pos, norm.method = norm.method, 
                      p.adjust.method = p.adjust.method, pseudocount.use = pseudocount.use, 
                      mean.fxn = mean.fxn, #BPPARAM = sub_BPPARAM, 
                      seed = seed, verbose = FALSE, ...)
                    markers <- srt_tmp@tools[[paste0("DEtest_", 
                      grouping.var)]][[paste0("AllMarkers_", 
                      test.use)]]
                    if (!is.null(markers) && nrow(markers) > 
                      0) {
                      colnames(markers) <- gsub("group", "var", 
                        colnames(markers))
                      markers[["group1"]] <- as.character(group)
                      return(markers)
                    }
                    else {
                      return(NULL)
                    }
                  }
                } )
            DisturbedMarkers <- do.call(rbind.data.frame, DisturbedMarkers)
            if (!is.null(DisturbedMarkers) && nrow(DisturbedMarkers) > 
                0) {
                rownames(DisturbedMarkers) <- NULL
                DisturbedMarkers[, "group1"] <- factor(DisturbedMarkers[, 
                  "group1"], levels = levels(cell_group))
                DisturbedMarkers[, "test_group_number"] <- as.integer(table(unique(DisturbedMarkers[, 
                  c("gene", "group1")])[["gene"]])[DisturbedMarkers[, 
                  "gene"]])
                DisturbedMarkersMatrix <- as.data.frame.matrix(table(DisturbedMarkers[, 
                  c("gene", "group1")]))
                DisturbedMarkers[, "test_group"] <- apply(DisturbedMarkersMatrix, 
                  1, function(x) {
                    paste0(colnames(DisturbedMarkersMatrix)[x > 
                      0], collapse = ";")
                  })[DisturbedMarkers[, "gene"]]
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("DisturbedMarkers_", 
                  test.use)]] <- DisturbedMarkers
            }
            else {
                warning("No markers found.", immediate. = TRUE)
                srt@tools[[paste0("DEtest_", group_by)]][[paste0("DisturbedMarkers_", 
                  test.use)]] <- data.frame()
            }
        }
    }
    time_end <- Sys.time()
    if (verbose) {
        message(paste0("[", time_end, "] ", "DEtest done"))
        message("Elapsed time:", format(round(difftime(time_end, 
            time_start), 2), format = "%Y-%m-%d %H:%M:%S"))
    }
    return(srt)
}
RunEnrichment2 <- function (srt = NULL, group_by = NULL, test.use = "wilcox", DE_threshold = "avg_log2FC > 0 & p_val_adj < 0.05", 
                            geneID = NULL, geneID_groups = NULL, geneID_exclude = NULL, 
                            IDtype = "symbol", result_IDtype = "symbol", species = "Homo_sapiens", 
                            db = "GO_BP", db_update = FALSE, db_version = "latest", db_combine = FALSE, 
                            convert_species = TRUE, Ensembl_version = 103, mirror = NULL, 
                            TERM2GENE = NULL, TERM2NAME = NULL, minGSSize = 10, maxGSSize = 500, 
                            unlimited_db = c("Chromosome", "GeneType", "TF", "Enzyme", 
                                             "CSPA"), GO_simplify = FALSE, GO_simplify_cutoff = "p.adjust < 0.05", 
                            simplify_method = "Wang", simplify_similarityCutoff = 0.7, 
                            seed = 11) 
{
  #bpprogressbar(BPPARAM) <- TRUE
  #bpRNGseed(BPPARAM) <- seed
  time_start <- Sys.time()
  message(paste0("[", time_start, "] ", "Start Enrichment"))
  #message("Workers: ", bpworkers(BPPARAM))
  use_srt <- FALSE
  if (is.null(geneID)) {
    if (is.null(group_by)) {
      group_by <- "custom"
    }
    slot <- paste0("DEtest_", group_by)
    if (!slot %in% names(srt@tools) || length(grep(pattern = "AllMarkers", 
                                                   names(srt@tools[[slot]]))) == 0) {
      stop("Cannot find the DEtest result for the group '", 
           group_by, "'. You may perform RunDEtest first.")
    }
    index <- grep(pattern = paste0("AllMarkers_", test.use), 
                  names(srt@tools[[slot]]))[1]
    if (is.na(index)) {
      stop("Cannot find the 'AllMarkers_", test.use, "' in the DEtest result.")
    }
    de <- names(srt@tools[[slot]])[index]
    de_df <- srt@tools[[slot]][[de]]
    de_df <- de_df[with(de_df, eval(rlang::parse_expr(DE_threshold))), 
                   , drop = FALSE]
    rownames(de_df) <- seq_len(nrow(de_df))
    geneID <- de_df[["gene"]]
    geneID_groups <- de_df[["group1"]]
    use_srt <- TRUE
  }
  if (is.null(geneID_groups)) {
    geneID_groups <- rep(" ", length(geneID))
  }
  if (!is.factor(geneID_groups)) {
    geneID_groups <- factor(geneID_groups, levels = unique(geneID_groups))
  }
  geneID_groups <- factor(geneID_groups, levels = levels(geneID_groups)[levels(geneID_groups) %in% 
                                                                          geneID_groups])
  if (length(geneID_groups) != length(geneID)) {
    stop("length(geneID_groups)!=length(geneID)")
  }
  names(geneID_groups) <- geneID
  input <- data.frame(geneID = geneID, geneID_groups = geneID_groups)
  input <- input[!geneID %in% geneID_exclude, , drop = FALSE]
  if (is.null(TERM2GENE)) {
    db_list <- PrepareDB(species = species, db = db, db_update = db_update, 
                         db_version = db_version, db_IDtypes = IDtype, convert_species = convert_species, 
                         Ensembl_version = Ensembl_version, mirror = mirror)
  }
  else {
    colnames(TERM2GENE) <- c("Term", IDtype)
    db <- "custom"
    db_list <- list()
    db_list[[species]][[db]][["TERM2GENE"]] <- unique(TERM2GENE)
    if (is.null(TERM2NAME)) {
      TERM2NAME <- unique(TERM2GENE)[, c(1, 1)]
      colnames(TERM2NAME) <- c("Term", "Name")
    }
    db_list[[species]][[db]][["TERM2NAME"]] <- unique(TERM2NAME)
    db_list[[species]][[db]][["version"]] <- "custom"
  }
  if (isTRUE(db_combine)) {
    message("Create 'Combined' database ...")
    TERM2GENE <- do.call(rbind, lapply(db_list[[species]], 
                                       function(x) x[["TERM2GENE"]][, c("Term", IDtype)]))
    TERM2NAME <- do.call(rbind, lapply(names(db_list[[species]]), 
                                       function(x) {
                                         db_list[[species]][[x]][["TERM2NAME"]][["Name"]] <- paste0(db_list[[species]][[x]][["TERM2NAME"]][["Name"]], 
                                                                                                    " [", x, "]")
                                         db_list[[species]][[x]][["TERM2NAME"]][, c("Term", 
                                                                                    "Name")]
                                       }))
    version <- unlist(lapply(db_list[[species]], function(x) as.character(x[["version"]])))
    version <- paste0(names(version), ":", version, collapse = ";")
    db <- "Combined"
    db_list[[species]][[db]][["TERM2GENE"]] <- unique(TERM2GENE)
    db_list[[species]][[db]][["TERM2NAME"]] <- unique(TERM2NAME)
    db_list[[species]][[db]][["version"]] <- unique(version)
  }
  if (length(unique(c(IDtype, result_IDtype))) != 1) {
    res <- GeneConvert(geneID = unique(geneID), geneID_from_IDtype = IDtype, 
                       geneID_to_IDtype = result_IDtype, species_from = species, 
                       species_to = species, Ensembl_version = Ensembl_version, 
                       mirror = mirror)
    geneMap <- res$geneID_collapse
    colnames(geneMap)[colnames(geneMap) == "from_geneID"] <- IDtype
  }
  else {
    geneMap <- data.frame(IDtype = unique(geneID), row.names = unique(geneID))
    colnames(geneMap)[1] <- IDtype
  }
  input[[IDtype]] <- geneMap[as.character(input$geneID), IDtype]
  input[[result_IDtype]] <- geneMap[as.character(input$geneID), 
                                    result_IDtype]
  input <- unnest(input, cols = c(IDtype, result_IDtype))
  input <- input[!is.na(input[[IDtype]]), , drop = FALSE]
  message("Permform enrichment...")
  comb <- expand.grid(group = levels(geneID_groups), term = db, 
                      stringsAsFactors = FALSE)
  res_list <- future.apply::future_lapply(seq_len(nrow(comb)), function(i, id) {
    group <- comb[i, "group"]
    term <- comb[i, "term"]
    gene <- input[input$geneID_groups == group, IDtype]
    gene_mapid <- input[input$geneID_groups == group, result_IDtype]
    TERM2GENE_tmp <- db_list[[species]][[term]][["TERM2GENE"]][, 
                                                               c("Term", IDtype)]
    TERM2NAME_tmp <- db_list[[species]][[term]][["TERM2NAME"]]
    dup <- duplicated(TERM2GENE_tmp)
    na <- rowSums(is.na(TERM2GENE_tmp)) > 0
    TERM2GENE_tmp <- TERM2GENE_tmp[!(dup | na), , drop = FALSE]
    TERM2NAME_tmp <- TERM2NAME_tmp[TERM2NAME_tmp[["Term"]] %in% 
                                     TERM2GENE_tmp[["Term"]], , drop = FALSE]
    enrich_res <- enricher(gene = gene, minGSSize = ifelse(term %in% 
                                                             unlimited_db, 1, minGSSize), maxGSSize = ifelse(term %in% 
                                                                                                               unlimited_db, Inf, maxGSSize), pAdjustMethod = "BH", 
                           pvalueCutoff = Inf, qvalueCutoff = Inf, universe = NULL, 
                           TERM2GENE = TERM2GENE_tmp, TERM2NAME = TERM2NAME_tmp)
    if (!is.null(enrich_res) && nrow(enrich_res@result) > 
        0) {
      result <- enrich_res@result
      result[["Groups"]] <- group
      result[["Database"]] <- term
      result[["Version"]] <- as.character(db_list[[species]][[term]][["version"]])
      IDlist <- strsplit(result$geneID, split = "/")
      result$geneID <- unlist(lapply(IDlist, function(x) {
        x_result <- NULL
        for (i in x) {
          if (i %in% geneMap[[IDtype]]) {
            x_result <- c(x_result, unique(geneMap[geneMap[[IDtype]] == 
                                                     i, result_IDtype]))
          }
          else {
            x_result <- c(x_result, i)
          }
        }
        return(paste0(x_result, collapse = "/"))
      }))
      enrich_res@result <- result
      enrich_res@gene2Symbol <- as.character(gene_mapid)
      if (isTRUE(GO_simplify) && term %in% c("GO", "GO_BP", 
                                             "GO_CC", "GO_MF")) {
        sim_res <- enrich_res
        if (term == "GO") {
          sim_res@result[["ONTOLOGY"]] <- setNames(TERM2NAME_tmp[["ONTOLOGY"]], 
                                                   TERM2NAME_tmp[["Term"]])[sim_res@result[["ID"]]]
          sim_res@ontology <- "GOALL"
        }
        else {
          sim_res@ontology <- gsub(pattern = "GO_", replacement = "", 
                                   x = term)
        }
        nterm_simplify <- sum(with(sim_res@result, eval(rlang::parse_expr(GO_simplify_cutoff))))
        if (nterm_simplify <= 1) {
          warning(group, "|", term, " has no term to simplify.", 
                  immediate. = TRUE)
        }
        else {
          sim_res@result <- sim_res@result[with(sim_res@result, 
                                                eval(rlang::parse_expr(GO_simplify_cutoff))), 
                                           , drop = FALSE]
          semData <- db_list[[species]][[term]][["semData"]]
          ipclock(id)
          sim_res <- simplify(sim_res, measure = simplify_method, 
                              cutoff = simplify_similarityCutoff, semData = semData)
          ipcunlock(id)
          result_sim <- sim_res@result
          result_sim[["Groups"]] <- group
          result_sim[["Database"]] <- paste0(term, "_sim")
          result_sim[["Version"]] <- as.character(db_list[[species]][[term]][["version"]])
          result_sim[["ONTOLOGY"]] <- NULL
          sim_res@result <- result_sim
          enrich_res <- list(enrich_res, sim_res)
          names(enrich_res) <- paste(group, c(term, paste0(term, 
                                                           "_sim")), sep = "-")
        }
      }
    }
    else {
      enrich_res <- NULL
    }
    return(enrich_res)
  })
  nm <- paste(comb$group, comb$term, sep = "-")
  sim_index <- sapply(res_list, function(x) length(x) == 2)
  sim_list <- unlist(res_list[sim_index], recursive = FALSE)
  raw_list <- res_list[!sim_index]
  names(raw_list) <- nm[!sim_index]
  results <- c(raw_list, sim_list)
  results <- results[!sapply(results, is.null)]
  results <- results[intersect(c(nm, paste0(nm, "_sim")), names(results))]
  enrichment <- do.call(rbind, lapply(results, function(x) x@result))
  rownames(enrichment) <- NULL
  time_end <- Sys.time()
  message(paste0("[", time_end, "] ", "Enrichment done"))
  message("Elapsed time:", format(round(difftime(time_end, 
                                                 time_start), 2), format = "%Y-%m-%d %H:%M:%S"))
  res <- list(enrichment = enrichment, results = results, geneMap = geneMap, 
              input = input)
  if (isTRUE(use_srt)) {
    res[["DE_threshold"]] <- DE_threshold
    srt@tools[[paste("Enrichment", group_by, test.use, sep = "_")]] <- res
    return(srt)
  }
  else {
    return(res)
  }
}
browningChen commented 9 months ago

Just change bplapply to future.apply::future_lapply can help