fanglab / nanodisco

nanodisco: a toolbox for discovering and exploiting multiple types of DNA methylation from individual bacteria and microbiomes using nanopore sequencing.
Other
68 stars 7 forks source link

caught segfault in nanodisco preprocess on provided data(TP) #80

Open Stride99 opened 1 year ago

Stride99 commented 1 year ago

Dear devs,

When I run the nanodisco preprocess on TP WGA data provided in your thesis.I meet the issue: caught segfault address 0x269, cause 'memory not mapped'. Do you meet it before? Thank you!

When run: nanodisco preprocess -p 12 -f dataset/unzip_data/MinION_TP_WGA -s TP_WGA -o analysis/TP/preprocessed_subset -r reference/TP/TP_sequence.fasta

The Whole issue is: [2023-09-18 23:01:45] Localize all fast5 files. [2023-09-18 23:01:58] Found 415203 fast5 files. [2023-09-18 23:01:58] Extract sequences from fast5. Processed fast5 [===================>-----] 81% eta: 13m (elapsed: 00:55:59) caught segfault address 0x269, cause 'memory not mapped'

Traceback: 1: H5Dread(h5dataset = h5dataset, h5spaceFile = h5spaceFile, h5spaceMem = h5spaceMem, compoundAsDataFrame = compoundAsDataFrame, drop = drop, ...) 2: doTryCatch(return(expr), name, parentenv, handler) 3: tryCatchOne(expr, names, parentenv, handlers[[1L]]) 4: tryCatchList(expr, classes, parentenv, handlers) 5: tryCatch(expr, error = function(e) { call <- conditionCall(e) if (!is.null(call)) { if (identical(call[[1L]], quote(doTryCatch))) call <- sys.call(-4L) dcall <- deparse(call)[1L] prefix <- paste("Error in", dcall, ": ") LONG <- 75L sm <- strsplit(conditionMessage(e), "\n")[[1L]] w <- 14L + nchar(dcall, type = "w") + nchar(sm[1L], type = "w") if (is.na(w)) w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L], type = "b") if (w > LONG) prefix <- paste0(prefix, "\n ") } else prefix <- "Error : " msg <- paste0(prefix, conditionMessage(e), "\n") .Internal(seterrmessage(msg[1L])) if (!silent && isTRUE(getOption("show.error.messages"))) { cat(msg, file = outFile) .Internal(printDeferredWarnings()) } invisible(structure(msg, class = "try-error", condition = e))}) 6: try({ obj <- H5Dread(h5dataset = h5dataset, h5spaceFile = h5spaceFile, h5spaceMem = h5spaceMem, compoundAsDataFrame = compoundAsDataFrame, drop = drop, ...)}) 7: h5readDataset(h5dataset, index = index, start = start, stride = stride, block = block, count = count, compoundAsDataFrame = compoundAsDataFrame, drop = drop, ...) 8: h5read(h5loc, L[[i]]$name, ..., native = native) 9: h5loadData(loc$H5Identifier, L, all = all, ..., native = native) 10: h5dump(gid, start = start, stride = stride, block = block, count = count, compoundAsDataFrame = compoundAsDataFrame, callGeneric = callGeneric, ...) 11: h5read(f5_file_path, paste0("/Analyses/", basecall_group, "/BaseCalled_template")) 12: eval(xpr, envir = envir) 13: eval(xpr, envir = envir) 14: doTryCatch(return(expr), name, parentenv, handler) 15: tryCatchOne(expr, names, parentenv, handlers[[1L]]) 16: tryCatchList(expr, classes, parentenv, handlers) 17: tryCatch(eval(xpr, envir = envir), error = function(e) e) 18: doTryCatch(return(expr), name, parentenv, handler) 19: tryCatchOne(expr, names, parentenv, handlers[[1L]]) 20: tryCatchList(expr, classes, parentenv, handlers) 21: tryCatch({ repeat { args <- nextElem(it) if (obj$verbose) { cat(sprintf("evaluation # %d:\n", i)) print(args) } for (a in names(args)) assign(a, args[[a]], pos = envir, inherits = FALSE) r <- tryCatch(eval(xpr, envir = envir), error = function(e) e) if (obj$verbose) { cat("result of evaluating expression:\n") print(r) } tryCatch(accumulator(list(r), i), error = function(e) { cat("error calling combine function:\n") print(e) NULL }) i <- i + 1 }}, error = function(e) { if (!identical(conditionMessage(e), "StopIteration")) stop(simpleError(conditionMessage(e), expr))}) 22: e$fun(obj, substitute(ex), parent.frame(), e$data) 23: foreach(f5_file = chunk_list_fast5_files[[chunk_idx]], .final = function(x) { do.call(rbind, x)}) %do% { f5_file_path <- normalizePath(paste0(path_fast5, f5_file)) f5_content <- h5ls(f5_file_path, recursive = FALSE) f5_content <- paste0(f5_content$group, f5_content$name) fast5_type <- find.fast5.type(f5_content) if (fast5_type == "multi") { linear_sequence <- foreach(f5_read_data = f5_content, .final = function(x) { trimws(do.call(paste0, x)) }) %do% { f5_data <- h5read(f5_file_path, paste0(f5_read_data, "/Analyses/", basecall_group, "/BaseCalled_template")) subset_linear_sequence <- paste0(extact.fasta.from.fast5(f5_data, f5_file_path, row_to_keep), "\n") return(subset_linear_sequence) } } else if (fast5_type == "single") { f5_data <- h5read(f5_file_path, paste0("/Analyses/", basecall_group, "/BaseCalled_template")) linear_sequence <- extact.fasta.from.fast5(f5_data, f5_file_path, row_to_keep) } else { linear_sequence <- "uncalled" } return(linear_sequence)} 24: eval(c.expr, envir = args, enclos = envir) 25: eval(c.expr, envir = args, enclos = envir) 26: doTryCatch(return(expr), name, parentenv, handler) 27: tryCatchOne(expr, names, parentenv, handlers[[1L]]) 28: tryCatchList(expr, classes, parentenv, handlers) 29: tryCatch(eval(c.expr, envir = args, enclos = envir), error = function(e) e) 30: FUN(X[[i]], ...) 31: lapply(X = S, FUN = FUN, ...) 32: doTryCatch(return(expr), name, parentenv, handler) 33: tryCatchOne(expr, names, parentenv, handlers[[1L]]) 34: tryCatchList(expr, classes, parentenv, handlers) 35: tryCatch(expr, error = function(e) { call <- conditionCall(e) if (!is.null(call)) { if (identical(call[[1L]], quote(doTryCatch))) call <- sys.call(-4L) dcall <- deparse(call)[1L] prefix <- paste("Error in", dcall, ": ") LONG <- 75L sm <- strsplit(conditionMessage(e), "\n")[[1L]] w <- 14L + nchar(dcall, type = "w") + nchar(sm[1L], type = "w") if (is.na(w)) w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L], type = "b") if (w > LONG) prefix <- paste0(prefix, "\n ") } else prefix <- "Error : " msg <- paste0(prefix, conditionMessage(e), "\n") .Internal(seterrmessage(msg[1L])) if (!silent && isTRUE(getOption("show.error.messages"))) { cat(msg, file = outFile) .Internal(printDeferredWarnings()) } invisible(structure(msg, class = "try-error", condition = e))}) 36: try(lapply(X = S, FUN = FUN, ...), silent = TRUE) 37: sendMaster(try(lapply(X = S, FUN = FUN, ...), silent = TRUE)) 38: FUN(X[[i]], ...) 39: lapply(seq_len(cores), inner.do) 40: mclapply(argsList, FUN, mc.preschedule = preschedule, mc.set.seed = set.seed, mc.silent = silent, mc.cores = cores) 41: e$fun(obj, substitute(ex), parent.frame(), e$data) 42: foreach(chunk_idx = seq(1, length(chunk_list_fast5_files)), .final = function(x) { do.call(rbind, x)}) %dopar% { subset_linear_sequences <- foreach(f5_file = chunk_list_fast5_files[[chunk_idx]], .final = function(x) { do.call(rbind, x) }) %do% { f5_file_path <- normalizePath(paste0(path_fast5, f5_file)) f5_content <- h5ls(f5_file_path, recursive = FALSE) f5_content <- paste0(f5_content$group, f5_content$name) fast5_type <- find.fast5.type(f5_content) if (fast5_type == "multi") { linear_sequence <- foreach(f5_read_data = f5_content, .final = function(x) { trimws(do.call(paste0, x)) }) %do% { f5_data <- h5read(f5_file_path, paste0(f5_read_data, "/Analyses/", basecall_group, "/BaseCalled_template")) subset_linear_sequence <- paste0(extact.fasta.from.fast5(f5_data, f5_file_path, row_to_keep), "\n") return(subset_linear_sequence) } } else if (fast5_type == "single") { f5_data <- h5read(f5_file_path, paste0("/Analyses/", basecall_group, "/BaseCalled_template")) linear_sequence <- extact.fasta.from.fast5(f5_data, f5_file_path, row_to_keep) } else { linear_sequence <- "uncalled" } return(linear_sequence) } if (use_doMC) { progress.tracker(pb, chunk_idx, nb_threads) } else { p() } return(subset_linear_sequences)} 43: extract.sequence(path_input, base_name, path_output, nb_threads, nb_chunks, seq_type, basecall_group) An irrecoverable exception occurred. R is aborting now ... [2023-09-19 00:08:42] All fast5 files processed.
[2023-09-19 00:08:44] Map reads.