HenrikBengtsson / future.BatchJobs

:rocket: R package: future.BatchJobs: A Future API for Parallel and Distributed Processing using BatchJobs [Intentionally archived on CRAN on 2021-01-08]
https://cran.r-project.org/package=future.BatchJobs
8 stars 0 forks source link

TROUBLESHOOTING: More informative error message than "fatal error occured: 101. qsub produced exit code 27; output qsub: submit error ..." #52

Open HenrikBengtsson opened 8 years ago

HenrikBengtsson commented 8 years ago

I've started to get a few of these recently and I have no idea why:

Error in submitJobs(reg, ids = id, resources = resources) :
  Fatal error occured: 101. qsub produced exit code 27; output qsub: submit error (Command syntax invalid MSG=Invalid Syntax)
In addition: Warning message:
In setMethodS3.default("readTotalCNsAndBAFs", "SeqzFile", function(this,  :
  Method already existed and was overwritten: readTotalCNsAndBAFs.SeqzFile
> traceback()
15: stop(e)
14: value[[3L]](cond)
13: tryCatchOne(expr, names, parentenv, handlers[[1L]])
12: tryCatchList(expr, classes, parentenv, handlers)
11: tryCatch({
        for (i in seq_along(ids)) {
            id = ids[[i]]
            id1 = id[1L]
            retries = 0L
            repeat {
                if (limit.concurrent.jobs && length(cf$listJobs(conf,
                    reg)) >= conf$max.concurrent.jobs) {
                    batch.result = makeSubmitJobResult(status = 10L,
                      batch.job.id = NA_character_, "Max concurrent jobs exhausted")
                }
                else {
                    interrupted = TRUE
                    submit.time = now()
                    batch.result = cf$submitJob(conf = conf, reg = reg,
                      job.name = sprintf("%s-%i", reg$id, id1), rscript = rscripts[i],
                      log.file = getLogFilePath(reg, id1), job.dir = getJobDirs(reg,
                        id1), resources = resources, arrayjobs = if (chunks.as.arrayjobs)
                        length(id)
                      else 1L)
                }
                if (batch.result$status == 0L) {
                    submit.msgs$push(dbMakeMessageSubmitted(reg,
                      id, time = submit.time, batch.job.id = batch.result$batch.job.id,
                      first.job.in.chunk.id = if (is.chunked)
                        id1
                      else NULL, resources.timestamp = resources.timestamp))
                    interrupted = FALSE
                    bar$inc(1L)
                    break
                }
                interrupted = FALSE
                if (batch.result$status > 0L && batch.result$status <=
                    100L) {
                    if (is.finite(max.retries) && retries > max.retries)
                      stopf("Retried already %i times to submit. Aborting.",
                        max.retries)
                    Sys.sleep(wait(retries))
                    logger$log(batch.result$msg)
                    retries = retries + 1L
                }
                else if (batch.result$status > 100L && batch.result$status <=
                    200L) {
                    stopf("Fatal error occured: %i. %s", batch.result$status,
                      batch.result$msg)
                }
                else {
                    stopf("Illegal status code %s returned from cluster functions!",
                      batch.result$status)
                }
            }
        }
    }, error = bar$error)
10: submitJobs(reg, ids = id, resources = resources)
9: run.BatchJobsFuture(future)
8: run(future)
7: plan(oplans)
6: mpileup.BamDataSet(bams[1:3], fa = fa, chromosomes = chrLabels,
       verbose = -10) at <text>#1
5: mpileup(bams[1:3], fa = fa, chromosomes = chrLabels, verbose = -10) at 1.sequenza,mpileup.R#90
4: eval(expr, envir, enclos)
3: eval(ei, envir)
2: withVisible(eval(ei, envir))
1: source("1.sequenza,mpileup.R", echo = TRUE)