Open zzgw opened 5 years ago
Thanks for the report. Could you add the code that you used to run it?
after several times of running, the same exact error repeats. I also run the pbmc.loom data, it failed at GetVariableGenes step, but astonishingly same exact error. seg default when try to write back to loom with the same address 0x30. my thinking right now is probably one of the parameters feed to your code line 279: res <- .Call("R_H5Dread", self$id, mem_type_id, mem_space_id, file_space_id, dataset_xfer_pl$id, 280 buffer, FALSE, PACKAGE="hdf5r") is not there, and your code does not catch the error. the runPCA in fact finished calculation as the progress bar shows 100%. but when it tries to write it failed.
since I am doing testing, the code is a little bit messy, but it is this: `Sys.getenv("R_HOME")
library(loomR)
library(Seurat) library(rbenchmark) library(profvis)
wd = "/data/loom" setwd(wd)
save_bm = function(bm) { save(bm, file = paste0("benchmark-", deparse(substitute(bm)), ".RData")) }
bm0 = benchmark(mca.matrix <- readRDS(file = "MCA_merged_mat.rds"), replications = 1) save_bm(bm0) mca.metadata <- read.csv(file = "MCA_All-batch-removed-assignments.csv", row.names = 1) #assume cell name is unique head(mca.metadata) dim(mca.metadata) #266277 cells length(unique(mca.metadata$ClusterID)) #818 length(unique(mca.metadata$Batch)) #73
object.size(mca.matrix) #3136204120 bytes object.size(mca.metadata) #51302704 bytes
class(mca.matrix) class(mca.metadata)
tmp = benchmark(cells.use <- which(colnames(mca.matrix) %in% rownames(mca.metadata)))
benchmark(cells.use <- which(x = colnames(x = mca.matrix) %in% rownames(x = mca.metadata))) mca.matrix <- mca.matrix[, cells.use] object.size(mca.matrix) #1863239736 bytes mca.metadata <- mca.metadata[colnames(x = mca.matrix), ]
bm1 = benchmark(mca <- create(filename = "mca.loom", data = mca.matrix, display.progress = FALSE, calc.numi = TRUE), replications = 1) save_bm(bm1)
bm2 = benchmark(mca <- connect(filename = "mca.loom", mode = "r+"), replications = 1)
mca mca$row.attrs #only 1 dataset gene_names mca$col.attrs #three datasets cell_names, nGene, nUMI mca$layers #nothing mca$row_graphs mca$col_graphs mca$version mca$chunk.points() #7073 cells per chunk mca$chunk.indices() #cell chunk to list
cell.names <- mca[["col_attrs/cell_names"]][] gene.names <- mca[["row_attrs/gene_names"]][]
attrs <- c("nUMI", "nGene", "cell_names") attr.df <- mca$get.attribute.df(MARGIN = 2, attribute.names = attrs)
methods(class="loom")
bm3 = benchmark(NormalizeData(object = mca, chunk.size = 1000, scale.factor = 10000, display.progress = FALSE), replications = 1) save_bm(bm3)
bm4 = benchmark(FindVariableGenes(object = mca, overwrite = T), replications = 0)
save_bm(bm4) bm5 = benchmark(GetVariableGenes(object = mca), replications = 0)
save_bm(bm5) hv.genes <- head(x = GetVariableGenes(object = mca)$index, n = 1000)
mito.genes <- grep(pattern = "^mt-", x = mca[["row_attrs/gene_names"]][], value = FALSE) mca$apply(name = "col_attrs/percent_mito", FUN = function(mat) { return(rowSums(x = mat[, mito.genes])/rowSums(x = mat)) }, MARGIN = 2, dataset.use = "matrix", chunk.size = 1000, overwrite = T)
ScaleData(object = mca, genes.use = hv.genes, chunk.size = 20, vars.to.regress = "percent_mito", overwrite = T)
nGene = mca[["col_attrs/nGene"]][] nUMI = mca[["col_attrs/nUMI"]][] percent.mito = mca[["col_attrs/percent_mito"]][]
VlnPlot(object = mca, features.plot = c(nGene, nUMI, percent.mito), nCol = 3)
RunPCA(object = mca, pc.genes = hv.genes, online.pca = FALSE, pcs.compute = 100, do.print = TRUE, pcs.print = 1:5, genes.print = 5, chunk.size = 100)
PCElbowPlot(object = mca, num.pc = 100)
sessionInfo() `
I am running Seurat loomR mca dataset tutorial, everything goes well till runpca step. the nohup output file shows following: the 100% progress bar means runpca itself finished. the traceback seems showing that when write the loom file using hdf5r somehow failed.
| ^M |======================================================================| 100%
caught segfault address 0x30, cause 'memory not mapped'
Traceback: 1: .Call("R_H5Dread", self$id, mem_type_id, mem_space_id, file_space_id, dataset_xfer_pl$id, buffer, FALSE, PACKAGE = "hdf5r") 2: self$read_low_level(file_space = self_space_id, mem_space = mem_space_id, dataset_xfer_pl = dataset_xfer_pl, set_dim = TRUE, dim_to_set = dim_to_set, drop = drop) 3: x$read(args = args, dataset_xfer_pl = dataset_xfer_pl, flags = flags, drop = drop, envir = envir) 4:
[.H5D
(object[[row.names]], rows.use) 5: object[[row.names]][rows.use] 6: BlockCov.loom(object = object, mat = scale.data, display.progress = display.progress, rows.use = pc.genes, row.names = gene.names) 7: BlockCov(object = object, mat = scale.data, display.progress = display.progress, rows.use = pc.genes, row.names = gene.names) 8: RunPCA.loom(object = mca, pc.genes = hv.genes, online.pca = FALSE, pcs.compute = 100, chunk.size = 100, overwrite = T) 9: RunPCA(object = mca, pc.genes = hv.genes, online.pca = FALSE, pcs.compute = 100, chunk.size = 100, overwrite = T) An irrecoverable exception occurred. R is aborting now ...my r environment
Matrix products: default BLAS: /usr/lib/libblas/libblas.so.3.6.0 LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages: [1] stats graphics grDevices utils datasets methods base
other attached packages: [1] rbenchmark_1.0.0 Seurat_2.3.1 Matrix_1.2-15 cowplot_0.9.4
[5] ggplot2_3.1.0 loomR_0.2.0 itertools_0.1-3 iterators_1.0.10 [9] hdf5r_1.0.1 R6_2.3.0
loaded via a namespace (and not attached): [1] diffusionMap_1.1-0.1 Rtsne_0.15 VGAM_1.0-6
[4] colorspace_1.4-0 ggridges_0.5.1 class_7.3-15
[7] modeltools_0.2-22 mclust_5.4.2 htmlTable_1.13.1
[10] base64enc_0.1-3 proxy_0.4-22 rstudioapi_0.9.0
[13] npsurv_0.4-0 flexmix_2.3-14 bit64_0.9-7
[16] RSpectra_0.13-1 onlinePCA_1.3.1 lubridate_1.7.4
[19] prodlim_2018.04.18 mvtnorm_1.0-8 ranger_0.11.0
[22] codetools_0.2-16 splines_3.5.2 R.methodsS3_1.7.1
[25] lsei_1.2-0 robustbase_0.93-3 knitr_1.21
[28] tclust_1.4-1 jsonlite_1.6 Formula_1.2-3
[31] caret_6.0-81 ica_1.0-2 cluster_2.0.7-1
[34] kernlab_0.9-27 png_0.1-7 R.oo_1.22.0
[37] compiler_3.5.2 backports_1.1.3 assertthat_0.2.0
[40] lazyeval_0.2.1 lars_1.2 acepack_1.4.1
[43] htmltools_0.3.6 tools_3.5.2 bindrcpp_0.2.2
[46] igraph_1.2.2 gtable_0.2.0 glue_1.3.0
[49] RANN_2.6.1 reshape2_1.4.3 dplyr_0.7.8
[52] Rcpp_1.0.0 trimcluster_0.1-2.1 gdata_2.18.0
[55] ape_5.2 nlme_3.1-137 fpc_2.1-11.1
[58] lmtest_0.9-36 gbRd_0.4-11 timeDate_3043.102
[61] xfun_0.4 gower_0.1.2 stringr_1.3.1
[64] irlba_2.3.3 gtools_3.8.1 DEoptimR_1.0-8
[67] zoo_1.8-4 MASS_7.3-51.1 scales_1.0.0
[70] ipred_0.9-8 doSNOW_1.0.16 parallel_3.5.2
[73] RColorBrewer_1.1-2 yaml_2.2.0 reticulate_1.10
[76] pbapply_1.4-0 gridExtra_2.3 segmented_0.5-3.0
[79] rpart_4.1-13 latticeExtra_0.6-28 stringi_1.2.4
[82] foreach_1.4.4 checkmate_1.9.1 caTools_1.17.1.1
[85] bibtex_0.4.2 lava_1.6.4 dtw_1.20-1
[88] Rdpack_0.10-1 SDMTools_1.1-221 rlang_0.3.1
[91] pkgconfig_2.0.2 prabclus_2.2-7 bitops_1.0-6
[94] lattice_0.20-38 ROCR_1.0-7 purrr_0.3.0
[97] bindr_0.1.1 recipes_0.1.4 htmlwidgets_1.3
[100] bit_1.1-14 tidyselect_0.2.5 plyr_1.8.4
[103] magrittr_1.5 snow_0.4-3 gplots_3.0.1.1
[106] generics_0.0.2 Hmisc_4.2-0 pillar_1.3.1
[109] foreign_0.8-71 withr_2.1.2 mixtools_1.1.0
[112] fitdistrplus_1.0-14 survival_2.43-3 scatterplot3d_0.3-41 [115] nnet_7.3-12 tsne_0.1-3 tibble_2.0.1
[118] crayon_1.3.4 KernSmooth_2.23-15 grid_3.5.2
[121] data.table_1.12.0 FNN_1.1.2.2 ModelMetrics_1.2.2
[124] metap_1.1 digest_0.6.18 diptest_0.75-7
[127] tidyr_0.8.2 R.utils_2.7.0 stats4_3.5.2
[130] munsell_0.5.0
my machine is an aws es2 instance with 64 cores and 256G memory Welcome to Ubuntu 16.04.2 LTS (GNU/Linux 4.4.0-1026-aws x86_64)
Support: https://ubuntu.com/advantage
Get cloud support with Ubuntu Advantage Cloud Guest: http://www.ubuntu.com/business/services/cloud
177 packages can be updated. 0 updates are security updates.
New release '18.04.1 LTS' available. Run 'do-release-upgrade' to upgrade to it.