Mykrobe-tools / mykrobe

Antibiotic resistance prediction in minutes
MIT License
102 stars 26 forks source link

Mykrobe reads multiple input files #93

Closed davidjstudholme closed 3 years ago

davidjstudholme commented 4 years ago

I am not 100% sure whether this is a bug or just an unexpected behaviour ...

I am using Mykrobe 0.8.1 with Mac OS X Catalina.

I have a folder containing three BAM files:

Taras-iMac:BAM and index files (Nigeria) djs217$ ls
ERR1679585.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam
ERR1679585.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam.bai
ERR1679586.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam
ERR1679586.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam.bai
ERR1679587.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam
ERR1679587.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam.bai

I drag and drop one of these .bam files into the Mykrobe 'Analyse sample' window. Yet the results that I obtain seem to be derived from two of the .bam files:

Taras-iMac:Desktop djs217$ grep ERR test.json 
  "ERR1679587.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup": {
      "/Users/djs217/Downloads/Mycobacterium/BAM and index files (Nigeria)/ERR1679587.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam",
      "/Users/djs217/Downloads/Mycobacterium/BAM and index files (Nigeria)/ERR1679586.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam"

Here is the complete .json:

{
  "ERR1679587.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup": {
    "susceptibility": {
      "Ofloxacin": {
        "predict": "S"
      },
      "Moxifloxacin": {
        "predict": "S"
      },
      "Isoniazid": {
        "predict": "r",
        "called_by": {
          "katG_S315X-GCT2155167GGT": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -1749.7583360393392,
              -41.149410021281255,
              -2905.8272685376987
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 48,
                  "min_non_zero_depth": 46,
                  "kmer_count": 952,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 29,
                  "min_non_zero_depth": 27,
                  "kmer_count": 673,
                  "klen": 20
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 1709
            },
            "_cls": "Call.VariantCall"
          },
          "inhA_I194T-ATC1674781ACC": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -1995.580154806023,
              -135.76348633429006,
              -1722.3820333367769
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 29,
                  "min_non_zero_depth": 27,
                  "kmer_count": 577,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 33,
                  "min_non_zero_depth": 30,
                  "kmer_count": 643,
                  "klen": 20
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 1860
            },
            "_cls": "Call.VariantCall"
          },
          "fabG1_C-15X-C1673425T": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -3618.938281430198,
              -52.836386824656074,
              -1948.5744751776533
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 41,
                  "min_non_zero_depth": 34,
                  "kmer_count": 789,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 60,
                  "min_non_zero_depth": 54,
                  "kmer_count": 1197,
                  "klen": 21
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 3566
            },
            "_cls": "Call.VariantCall"
          }
        }
      },
      "Kanamycin": {
        "predict": "S"
      },
      "Ethambutol": {
        "predict": "r",
        "called_by": {
          "embB_D328Y-GAT4247495TAT": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -3044.266878988706,
              -17.53275308244065,
              -2299.1536124740906
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 44,
                  "min_non_zero_depth": 40,
                  "kmer_count": 874,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 53,
                  "min_non_zero_depth": 50,
                  "kmer_count": 1056,
                  "klen": 21
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 3027
            },
            "_cls": "Call.VariantCall"
          }
        }
      },
      "Streptomycin": {
        "predict": "r",
        "called_by": {
          "gid_S149R-GCT4407756GCG": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -1163.1567416519706,
              -131.99783326518354,
              -4271.422755329824
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 64,
                  "min_non_zero_depth": 57,
                  "kmer_count": 1265,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 29,
                  "min_non_zero_depth": 27,
                  "kmer_count": 524,
                  "klen": 19
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 1031
            },
            "_cls": "Call.VariantCall"
          }
        }
      },
      "Ciprofloxacin": {
        "predict": "S"
      },
      "Pyrazinamide": {
        "predict": "S"
      },
      "Rifampicin": {
        "predict": "r",
        "called_by": {
          "rpoB_D435X-GAC761109GTC": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -2127.9607562541355,
              -21.398304887363338,
              -3213.6020368178592
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 53,
                  "min_non_zero_depth": 49,
                  "kmer_count": 1076,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 43,
                  "min_non_zero_depth": 39,
                  "kmer_count": 814,
                  "klen": 20
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 2107
            },
            "_cls": "Call.VariantCall"
          },
          "rpoB_H445X-CAC761139TAC": {
            "variant": null,
            "genotype": [
              0,
              1
            ],
            "genotype_likelihoods": [
              -2826.1226974560204,
              -10.684381854753155,
              -2613.233192737559
            ],
            "info": {
              "coverage": {
                "reference": {
                  "percent_coverage": 100,
                  "median_depth": 48,
                  "min_non_zero_depth": 46,
                  "kmer_count": 959,
                  "klen": 21
                },
                "alternate": {
                  "percent_coverage": 100,
                  "median_depth": 50,
                  "min_non_zero_depth": 48,
                  "kmer_count": 1011,
                  "klen": 21
                }
              },
              "expected_depths": [
                89
              ],
              "contamination_depths": [],
              "filter": [],
              "conf": 2815
            },
            "_cls": "Call.VariantCall"
          }
        }
      },
      "Amikacin": {
        "predict": "S"
      },
      "Capreomycin": {
        "predict": "S"
      }
    },
    "phylogenetics": {
      "phylo_group": {
        "Mycobacterium_tuberculosis_complex": {
          "percent_coverage": 98.902,
          "median_depth": 89
        }
      },
      "sub_complex": {
        "Unknown": {
          "percent_coverage": -1,
          "median_depth": -1
        }
      },
      "species": {
        "Mycobacterium_tuberculosis": {
          "percent_coverage": 98.241,
          "median_depth": 83
        }
      },
      "lineage": {
        "European_American": {
          "percent_coverage": 100,
          "median_depth": 101
        }
      }
    },
    "kmer": 21,
    "probe_sets": [
      "/Applications/Mykrobe.app/Contents/Resources/bin/mykrobe/data/panels/tb-species-170421.fasta.gz",
      "/Applications/Mykrobe.app/Contents/Resources/bin/mykrobe/data/panels/tb-hunt-probe-set-jan-03-2019.fasta.gz"
    ],
    "files": [
      "/Users/djs217/Downloads/Mycobacterium/BAM and index files (Nigeria)/ERR1679587.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam",
      "/Users/djs217/Downloads/Mycobacterium/BAM and index files (Nigeria)/ERR1679586.versus.GCA_000195955.2_ASM19595v2_genomic.aln.sorted.rmdup.bam"
    ],
    "version": {
      "mykrobe-predictor": "v0.8.1",
      "mykrobe-atlas": "v0.8.1"
    },
    "genotype_model": "kmer_count"
  }
}
iqbal-lab commented 4 years ago

That is not intended behaviour. Thank you for raising this

martinghunt commented 3 years ago

This is fixed in new release version v0.9.0. https://github.com/Mykrobe-tools/mykrobe/releases/tag/v0.9.0