Closed Peter-J-Freeman closed 3 years ago
@ifokkema you may like what's coming
The default behaviour of select_transcript = "all" or a list of transcripts remains
Added the following
>>> import json
>>> import VariantValidator
>>> vval = VariantValidator.Validator()
>>> variant = 'NC_000017.10:g.48275363C>A'
>>> genome_build = 'GRCh37'
>>> select_transcripts = 'select'
>>> validate = vval.validate(variant, genome_build, select_transcripts)
>>> validation = validate.format_as_dict(with_meta=True)
>>> print(validation)
{'flag': 'gene_variant',
'NM_000088.4:c.589G>T': {'selected_assembly': 'GRCh37', 'submitted_variant': 'NC_000017.10:g.48275363C>A', 'gene_symbol': 'COL1A1', 'gene_ids': {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ensembl_gene_id': 'ENSG00000108821', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150'], 'ccds_ids': ['CCDS11561']}, 'annotations': {'db_xref': {'CCDS': 'CCDS11561.1', 'select': 'MANE', 'ncbigene': '1277', 'ensemblgene': None, 'hgnc': 'HGNC:2197'}, 'chromosome': '17', 'map': '17q21.33', 'note': 'collagen type I alpha 1 chain', 'variant': '0', 'refseq_select': True, 'mane_select': True, 'ensembl_select': False, 'mane_plus_clinical': False}, 'transcript_description': 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA', 'hgvs_transcript_variant': 'NM_000088.4:c.589G>T', 'genome_context_intronic_sequence': '', 'refseqgene_context_intronic_sequence': '', 'hgvs_refseqgene_variant': '', 'hgvs_predicted_protein_consequence': {'tlr': 'NP_000079.2:p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)', 'lrg_tlr': 'LRG_1p1:p.(Gly197Cys)', 'lrg_slr': 'LRG_1p1:p.(G197C)'}, 'validation_warnings': ['RefSeqGene record not available'], 'hgvs_lrg_transcript_variant': '', 'hgvs_lrg_variant': '', 'alt_genomic_loci': [], 'primary_assembly_loci': {'grch37': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'hg19': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'grch38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}, 'hg38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}}, 'reference_sequence_records': {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2'}},
'NM_000088.3:c.589G>T': {'selected_assembly': 'GRCh37', 'submitted_variant': 'NC_000017.10:g.48275363C>A', 'gene_symbol': 'COL1A1', 'gene_ids': {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ensembl_gene_id': 'ENSG00000108821', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150'], 'ccds_ids': ['CCDS11561']}, 'annotations': {'db_xref': {'CCDS': 'CCDS11561.1', 'select': 'RefSeq', 'ncbigene': '1277', 'ensemblgene': None, 'hgnc': 'HGNC:2197'}, 'chromosome': '17', 'map': '17q21.33', 'note': 'collagen type I alpha 1 chain', 'variant': '0', 'refseq_select': True, 'mane_select': False, 'ensembl_select': False, 'mane_plus_clinical': False}, 'transcript_description': 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA', 'hgvs_transcript_variant': 'NM_000088.3:c.589G>T', 'genome_context_intronic_sequence': '', 'refseqgene_context_intronic_sequence': '', 'hgvs_refseqgene_variant': 'NG_007400.1:g.8638G>T', 'hgvs_predicted_protein_consequence': {'tlr': 'NP_000079.2:p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)', 'lrg_tlr': 'LRG_1p1:p.(Gly197Cys)', 'lrg_slr': 'LRG_1p1:p.(G197C)'}, 'validation_warnings': ['A more recent version of the selected reference sequence NM_000088.3 is available (NM_000088.4): NM_000088.4:c.589G>T MUST be fully validated prior to use in reports: select_variants=NM_000088.4:c.589G>T', 'RefSeqGene record not available'], 'hgvs_lrg_transcript_variant': 'LRG_1t1:c.589G>T', 'hgvs_lrg_variant': 'LRG_1:g.8638G>T', 'alt_genomic_loci': [], 'primary_assembly_loci': {'grch37': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'hg19': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'grch38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}, 'hg38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}}, 'reference_sequence_records': {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'}},
'metadata': {'variantvalidator_version': '1.0.4.dev214+g9cf795f.d20210407', 'variantvalidator_hgvs_version': '2.0.1.dev1+gb3a18e0', 'vvta_version': 'vvta_2021_2', 'vvseqrepo_db': 'VV_SR_2021_2/master', 'vvdb_version': 'vvdb_2021_4'}}
>>> import json
>>> import VariantValidator
>>> vval = VariantValidator.Validator()
>>> variant = 'NC_000017.10:g.48275363C>A'
>>> genome_build = 'GRCh37'
>>> select_transcripts = 'mane_select'
>>> validate = vval.validate(variant, genome_build, select_transcripts)
>>> validation = validate.format_as_dict(with_meta=True)
>>> print(validation)
{'flag': 'gene_variant',
'NM_000088.4:c.589G>T': {'selected_assembly': 'GRCh37', 'submitted_variant': 'NC_000017.10:g.48275363C>A', 'gene_symbol': 'COL1A1', 'gene_ids': {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ensembl_gene_id': 'ENSG00000108821', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150'], 'ccds_ids': ['CCDS11561']}, 'annotations': {'db_xref': {'CCDS': 'CCDS11561.1', 'select': 'MANE', 'ncbigene': '1277', 'ensemblgene': None, 'hgnc': 'HGNC:2197'}, 'chromosome': '17', 'map': '17q21.33', 'note': 'collagen type I alpha 1 chain', 'variant': '0', 'refseq_select': True, 'mane_select': True, 'ensembl_select': False, 'mane_plus_clinical': False}, 'transcript_description': 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA', 'hgvs_transcript_variant': 'NM_000088.4:c.589G>T', 'genome_context_intronic_sequence': '', 'refseqgene_context_intronic_sequence': '', 'hgvs_refseqgene_variant': '', 'hgvs_predicted_protein_consequence': {'tlr': 'NP_000079.2:p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)', 'lrg_tlr': 'LRG_1p1:p.(Gly197Cys)', 'lrg_slr': 'LRG_1p1:p.(G197C)'}, 'validation_warnings': ['RefSeqGene record not available'], 'hgvs_lrg_transcript_variant': '', 'hgvs_lrg_variant': '', 'alt_genomic_loci': [], 'primary_assembly_loci': {'grch37': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'hg19': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'grch38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}, 'hg38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}}, 'reference_sequence_records': {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2'}},
'metadata': {'variantvalidator_version': '1.0.4.dev214+g9cf795f.d20210407', 'variantvalidator_hgvs_version': '2.0.1.dev1+gb3a18e0', 'vvta_version': 'vvta_2021_2', 'vvseqrepo_db': 'VV_SR_2021_2/master', 'vvdb_version': 'vvdb_2021_4'}}
>>>
>>> import json
>>> import VariantValidator
>>> vval = VariantValidator.Validator()
>>> variant = 'NC_000017.10:g.48275363C>A'
>>> genome_build = 'GRCh37'
>>> select_transcripts = 'refseq_select'
>>> validate = vval.validate(variant, genome_build, select_transcripts)
>>> validation = validate.format_as_dict(with_meta=True)
>>> print(validation)
{'flag': 'gene_variant',
'NM_000088.4:c.589G>T': {'selected_assembly': 'GRCh37', 'submitted_variant': 'NC_000017.10:g.48275363C>A', 'gene_symbol': 'COL1A1', 'gene_ids': {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ensembl_gene_id': 'ENSG00000108821', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150'], 'ccds_ids': ['CCDS11561']}, 'annotations': {'db_xref': {'CCDS': 'CCDS11561.1', 'select': 'MANE', 'ncbigene': '1277', 'ensemblgene': None, 'hgnc': 'HGNC:2197'}, 'chromosome': '17', 'map': '17q21.33', 'note': 'collagen type I alpha 1 chain', 'variant': '0', 'refseq_select': True, 'mane_select': True, 'ensembl_select': False, 'mane_plus_clinical': False}, 'transcript_description': 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA', 'hgvs_transcript_variant': 'NM_000088.4:c.589G>T', 'genome_context_intronic_sequence': '', 'refseqgene_context_intronic_sequence': '', 'hgvs_refseqgene_variant': '', 'hgvs_predicted_protein_consequence': {'tlr': 'NP_000079.2:p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)', 'lrg_tlr': 'LRG_1p1:p.(Gly197Cys)', 'lrg_slr': 'LRG_1p1:p.(G197C)'}, 'validation_warnings': ['RefSeqGene record not available'], 'hgvs_lrg_transcript_variant': '', 'hgvs_lrg_variant': '', 'alt_genomic_loci': [], 'primary_assembly_loci': {'grch37': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'hg19': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'grch38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}, 'hg38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}}, 'reference_sequence_records': {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2'}},
'NM_000088.3:c.589G>T': {'selected_assembly': 'GRCh37', 'submitted_variant': 'NC_000017.10:g.48275363C>A', 'gene_symbol': 'COL1A1', 'gene_ids': {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ensembl_gene_id': 'ENSG00000108821', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150'], 'ccds_ids': ['CCDS11561']}, 'annotations': {'db_xref': {'CCDS': 'CCDS11561.1', 'select': 'RefSeq', 'ncbigene': '1277', 'ensemblgene': None, 'hgnc': 'HGNC:2197'}, 'chromosome': '17', 'map': '17q21.33', 'note': 'collagen type I alpha 1 chain', 'variant': '0', 'refseq_select': True, 'mane_select': False, 'ensembl_select': False, 'mane_plus_clinical': False}, 'transcript_description': 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA', 'hgvs_transcript_variant': 'NM_000088.3:c.589G>T', 'genome_context_intronic_sequence': '', 'refseqgene_context_intronic_sequence': '', 'hgvs_refseqgene_variant': 'NG_007400.1:g.8638G>T', 'hgvs_predicted_protein_consequence': {'tlr': 'NP_000079.2:p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)', 'lrg_tlr': 'LRG_1p1:p.(Gly197Cys)', 'lrg_slr': 'LRG_1p1:p.(G197C)'}, 'validation_warnings': ['A more recent version of the selected reference sequence NM_000088.3 is available (NM_000088.4): NM_000088.4:c.589G>T MUST be fully validated prior to use in reports: select_variants=NM_000088.4:c.589G>T', 'RefSeqGene record not available'], 'hgvs_lrg_transcript_variant': 'LRG_1t1:c.589G>T', 'hgvs_lrg_variant': 'LRG_1:g.8638G>T', 'alt_genomic_loci': [], 'primary_assembly_loci': {'grch37': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'hg19': {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}}, 'grch38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}, 'hg38': {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}}}, 'reference_sequence_records': {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'}},
'metadata': {'variantvalidator_version': '1.0.4.dev214+g9cf795f.d20210407', 'variantvalidator_hgvs_version': '2.0.1.dev1+gb3a18e0', 'vvta_version': 'vvta_2021_2', 'vvseqrepo_db': 'VV_SR_2021_2/master', 'vvdb_version': 'vvdb_2021_4'}}
>>>
Needs testing, but this should be available to the API immediately (will need documenting - and will be ready when the servers are built) and should work in the batch tool. Again, needs documenting
Thanks for the tag! Since we might not know if there are any MANE select transcripts available, it would likely be more efficient to make just one call instead of two (i.e., select all and check for MANE transcripts rather than requesting for MANE transcripts, no results, select again for all). Actually, I think that's also best in the LOVD interface - show all, but highlight the MANE select transcript.
Since we might not know if there are any MANE select transcripts available, it would likely be more efficient to make just one call instead of two (i.e., select all and check for MANE transcripts rather than requesting for MANE transcripts, no results, select again for all). Actually, I think that's also best in the LOVD interface - show all, but highlight the MANE select transcript.
Yep, this is why I used the tag "select". We can safely assume, I would hope at least, that every gene will have a RefSeq select (and ensembl select when relevant).
So for your purposes, use "select" rather than "all"
Options are
Closes this issue
Is your feature request related to a problem? Please describe. Not related to a problem but will be an upgrade of the select_transcripts feature
Describe the solution you'd like