broadinstitute / seqr-loading-pipelines

hail-based pipelines for annotating variant callsets and exporting them to elasticsearch
MIT License
22 stars 20 forks source link

Create an updated pipeline-runner docker image that includes new VEP dependencies and v03_pipeline code. #820

Closed bpblanken closed 1 month ago

bpblanken commented 1 month ago
Mini QA results for VEP 110 on GRCh37:
Variants with at least one defined transcript consequence VEP 85: 25752605
Variants with at least one defined transcript consequence VEP 110: 25753151
Variants with equal first defined transcript consequence between 85 and 110: 25743763
Variants with differing first defined transcript consequence: 8842

A few examples:

[Struct(locus=Locus(contig=1, position=14513, reference_genome=GRCh37), alleles=['G', 'A'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.1633C>T', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000488147.1:n.1254-12C>T', hgvsp=None, transcript_id='ENST00000488147', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=15956, reference_genome=GRCh37), alleles=['G', 'A'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.1098-9C>T', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000423562.1:n.982-9C>T', hgvsp=None, transcript_id='ENST00000423562', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=16850, reference_genome=GRCh37), alleles=['T', 'C'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.938+4A>G', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[12, 24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000423562.1:n.822+8A>G', hgvsp=None, transcript_id='ENST00000423562', biotype_id=59, consequence_term_ids=[12, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=17066, reference_genome=GRCh37), alleles=['A', 'C'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.737-11T>G', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000423562.1:n.625-11T>G', hgvsp=None, transcript_id='ENST00000423562', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=17385, reference_genome=GRCh37), alleles=['G', 'A'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.605-21C>T', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000423562.1:n.489-17C>T', hgvsp=None, transcript_id='ENST00000423562', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=17516, reference_genome=GRCh37), alleles=['T', 'C'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.604+86A>G', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000541675.1:n.540-12A>G', hgvsp=None, transcript_id='ENST00000541675', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=17519, reference_genome=GRCh37), alleles=['G', 'T'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.604+83C>A', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000541675.1:n.540-15C>A', hgvsp=None, transcript_id='ENST00000541675', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=17753, reference_genome=GRCh37), alleles=['C', 'T'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000438504.2:n.464-11G>A', hgvsp=None, transcript_id='ENST00000438504', biotype_id=59, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000227232', hgvsc='ENST00000423562.1:n.352-11G>A', hgvsp=None, transcript_id='ENST00000423562', biotype_id=59, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=30548, reference_genome=GRCh37), alleles=['T', 'G'], vep85=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000243485', hgvsc='ENST00000469289.1:n.282T>G', hgvsp=None, transcript_id='ENST00000469289', biotype_id=61, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000243485', hgvsc='ENST00000473358.1:n.487-16T>G', hgvsp=None, transcript_id='ENST00000473358', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=30551, reference_genome=GRCh37), alleles=['C', 'T'], vep85=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000243485', hgvsc='ENST00000469289.1:n.285C>T', hgvsp=None, transcript_id='ENST00000469289', biotype_id=61, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000243485', hgvsc='ENST00000473358.1:n.487-13C>T', hgvsp=None, transcript_id='ENST00000473358', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=111366, reference_genome=GRCh37), alleles=['G', 'GA'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000238009', hgvsc='ENST00000466430.1:n.263+1333dupT', hgvsp=None, transcript_id='ENST00000466430', biotype_id=61, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000238009', hgvsc='ENST00000471248.1:n.225-10_225-9insT', hgvsp=None, transcript_id='ENST00000471248', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=111371, reference_genome=GRCh37), alleles=['A', 'G'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000238009', hgvsc='ENST00000466430.1:n.263+1329T>C', hgvsp=None, transcript_id='ENST00000466430', biotype_id=61, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000238009', hgvsc='ENST00000471248.1:n.225-14T>C', hgvsp=None, transcript_id='ENST00000471248', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=257681, reference_genome=GRCh37), alleles=['G', 'GA'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000228463', hgvsc='ENST00000424587.2:n.263+1335dupT', hgvsp=None, transcript_id='ENST00000424587', biotype_id=61, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000228463', hgvsc='ENST00000335577.4:n.106-10_106-9insT', hgvsp=None, transcript_id='ENST00000335577', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=257686, reference_genome=GRCh37), alleles=['A', 'G'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000228463', hgvsc='ENST00000424587.2:n.263+1331T>C', hgvsp=None, transcript_id='ENST00000424587', biotype_id=61, consequence_term_ids=[24, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000228463', hgvsc='ENST00000335577.4:n.106-14T>C', hgvsp=None, transcript_id='ENST00000335577', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=721757, reference_genome=GRCh37), alleles=['T', 'A'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000237491', hgvsc='ENST00000591702.1:n.578T>A', hgvsp=None, transcript_id='ENST00000591702', biotype_id=61, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000237491', hgvsc='ENST00000586928.1:n.125-16T>A', hgvsp=None, transcript_id='ENST00000586928', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=745021, reference_genome=GRCh37), alleles=['G', 'T'], vep85=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000237491', hgvsc='ENST00000412115.1:n.162G>T', hgvsp=None, transcript_id='ENST00000412115', biotype_id=61, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000237491', hgvsc='ENST00000588951.1:n.536-17G>T', hgvsp=None, transcript_id='ENST00000588951', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=852875, reference_genome=GRCh37), alleles=['C', 'T'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000223764', hgvsc='ENST00000609207.1:n.3522G>A', hgvsp=None, transcript_id='ENST00000609207', biotype_id=38, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000223764', hgvsc='ENST00000432961.1:n.514-9G>A', hgvsp=None, transcript_id='ENST00000432961', biotype_id=38, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=854305, reference_genome=GRCh37), alleles=['AG', 'A'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000223764', hgvsc='ENST00000609207.1:n.2091delC', hgvsp=None, transcript_id='ENST00000609207', biotype_id=38, consequence_term_ids=[23, 26], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000223764', hgvsc='ENST00000417705.1:n.359-11del', hgvsp=None, transcript_id='ENST00000417705', biotype_id=61, consequence_term_ids=[14, 24, 26], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=906445, reference_genome=GRCh37), alleles=['T', 'A'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000187583', hgvsc='ENST00000379410.3:c.613-48T>A', hgvsp=None, transcript_id='ENST00000379410', biotype_id=39, consequence_term_ids=[24], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000187583', hgvsc='ENST00000379407.3:c.613-12T>A', hgvsp=None, transcript_id='ENST00000379407', biotype_id=39, consequence_term_ids=[14, 24], is_lof_nagnag=None, lof_filter_ids=None)),
 Struct(locus=Locus(contig=1, position=986395, reference_genome=GRCh37), alleles=['C', 'T'], vep85=Struct(amino_acids=None, canonical=1, codons=None, gene_id='ENSG00000188157', hgvsc='ENST00000379370.2:c.5253+178C>T', hgvsp=None, transcript_id='ENST00000379370', biotype_id=39, consequence_term_ids=[24], is_lof_nagnag=None, lof_filter_ids=None), vep110=Struct(amino_acids=None, canonical=None, codons=None, gene_id='ENSG00000188157', hgvsc='ENST00000419249.1:c.149-17C>T', hgvsp=None, transcript_id='ENST00000419249', biotype_id=39, consequence_term_ids=[14, 24], is_lof_nagnag=None, lof_filter_ids=None))]
bpblanken commented 1 month ago

the new consequence_term_id is splice_polypyrimidine_tract_variant... which makes sense!