ssadedin / ximmer

Ximmer is a system for CNV calling on exome and targeted genomic sequencing
http://ssadedin.github.io/ximmer/
GNU Lesser General Public License v2.1
19 stars 10 forks source link

Analysis failed #13

Open natashapinto opened 6 years ago

natashapinto commented 6 years ago

Hi I've got a new error; the program spits out multiple errors like the one below:

------------------------------------- run_exome_depth ( 8 ) --------------------------------------

Command in stage run_exome_depth failed with exit status = 1 :

unset TMP; unset TEMP; TEMPDIR="/tmp/701468094965155-0" setsid Rscript - <<'!'

    source("/data/natasha/ximmer/eval/pipeline/tools/r-utils/cnv_utils.R")

    library(ExomeDepth)

    # Reference sequence
    hg19.fasta = "/data/humanGenome/human_g1k_v37.fasta"

    # Read the target / covered region
    print(sprintf("Reading target regions for 8 from Exome_Mito_formated_new.analysable.bed"))
    dsd.covered = read.bed(pipe("grep '^8[^0-9]' Exome_Mito_formated_new.analysable.bed"))

    # ExomeDepth wants the columns named differently
    dsd.covered = data.frame(
        chromosome=dsd.covered$chr,
        start=dsd.covered$start,
        end=dsd.covered$end,
        name=paste(dsd.covered$chr,dsd.covered$start,dsd.covered$end,sep="-")
    )

    # Now we need all the bam files. Generate them from sample names
    dsd.samples = c("O1708596","O1801976","O1803283")

    print(sprintf("Read %d samples",length(dsd.samples)))

    # Here we rely on ASSUMPTIONs:  - Single BAM file per sample
    dsd.bam.files = c('O1708596'='/data/natasha/HSQ18-1809968/WTCHG_547564_HA02.bam','O1801976'='/data/natasha/HSQ18-1809968/WTCHG_547564_HB02.bam','O1803283'='/data/natasha/HSQ18-1809968/WTCHG_547564_HC02.bam')

    print(sprintf("Found %d bam files",length(dsd.bam.files)))

    # Finally we can call ExomeDepth
    dsd.counts <- getBamCounts(bed.frame = dsd.covered,
                              bam.files = dsd.bam.files,
                              include.chr = F,
                              referenceFasta = hg19.fasta)

    # Note: at this point dsd.counts has column names reflecting the file names => convert to actual sample names
    print(sprintf("Successfully counted reads in BAM files"))

    colnames(dsd.counts) = c("GC", dsd.samples)

    # Problem: sample names starting with numbers get mangled. So convert them back, but ignore the first column
    # which is actually the GC percentage
    dsd.samples = colnames(dsd.counts)[-1]

    write(paste("start.p","end.p","type","nexons","start","end","chromosome","id","BF","reads.expected","reads.observed","reads.ratio","sample",sep="\t"), "analysis/ed/analysis.8.exome_depth.tsv")

    for(dsd.test.sample in dsd.samples) {

        print(sprintf("Processing sample %s", dsd.test.sample))

        dsd.reference.samples = dsd.samples[-match(dsd.test.sample, dsd.samples)]

        dsd.counts.df = as.data.frame(dsd.counts[,dsd.reference.samples])[,-1:-6]

        dsd.test.sample.counts = dsd.counts[,dsd.test.sample][[1]]

        print(sprintf("Selecting reference set for %s ...", dsd.test.sample ))
        dsd.reference = select.reference.set(
                                 test.counts = dsd.counts[,dsd.test.sample][[1]],
                                 reference.counts = as.matrix(as.data.frame(dsd.counts[,dsd.reference.samples])[,-1:-6]),
                                 bin.length = dsd.covered$end - dsd.covered$start
                                )

        # Get counts just for the reference set
        dsd.reference.counts = apply(dsd.counts.df[,dsd.reference$reference.choice,drop=F],1,sum)

        print(sprintf("Creating ExomeDepth object ..."))
        dsd.ed = new("ExomeDepth",
                      test = dsd.test.sample.counts,
                      reference = dsd.reference.counts,
                      formula = "cbind(test, reference) ~ 1")

        print(sprintf("Calling CNVs ..."))
        dsd.cnvs = CallCNVs(x = dsd.ed,
                                transition.probability = 0.0001,
                                chromosome = dsd.covered$chromosome,
                                start = dsd.covered$start,
                                end = dsd.covered$end,
                                name = dsd.covered$name,
                                expected.CNV.length=50000)

        dsd.results = dsd.cnvs@CNV.calls
        dsd.results$sample = rep(dsd.test.sample, nrow(dsd.results))

        print(sprintf("Writing results ..."))
        if(nrow(dsd.results)>0) {
            write.table(file="analysis/ed/analysis.8.exome_depth.tsv",
                        x=dsd.results,
                        row.names=F,
                        col.names=F,
                        sep="\t",
                        append=T)
        }
    }

    print(sprintf("Finished"))

!

Use 'bpipe errors' to see output from failed commands.

Caught: java.lang.RuntimeException: Analysis failed for results/run1: java.lang.RuntimeException: Analysis failed for results/run1: at Ximmer.runAnalysisForRun(Ximmer.groovy:462) at Ximmer$runAnalysisForRun$2.callCurrent(Unknown Source) at Ximmer.runAnalysis(Ximmer.groovy:337) at Ximmer$runAnalysis$1.callCurrent(Unknown Source) at Ximmer.run(Ximmer.groovy:199) at Ximmer$run.call(Unknown Source) at Ximmer.main(Ximmer.groovy:1279)

ssadedin commented 6 years ago

Hmm, unfortunately the output of the actual error isn't captured above so I can't tell if it's an installation type error (ExomeDepth not installed, working, etc) or a problem with the data.

You could try finding the analysis run directory and doing:

/data/natasha/ximmer/eval/tools/bpipe/0.9.9.6/bin/bpipe errors

The run directory will probably be inside the output directory that you specified on the command line and called 'run1' but that can change depending on your configuration.

You could check that ExomeDepth is installed just by running R and then entering

library(ExomeDepth)

I notice that you're running with only 3 input files which could be too few and causing ExomeDepth to fail to identify any usable controls. That's a bit harder to deal with - normally you would try to give it at least 8 - 10 samples to work with.

your-highness commented 5 years ago

Dear @ssadedin ,

I can confirm that increasing the number of bam files keeps ExomeDepth from failing.

Still having some errors with conifer and CODEX on the same data set though.

Best,