openvar / variantValidator

Public repository for VariantValidator project
GNU Affero General Public License v3.0
70 stars 21 forks source link

How do we handle Ensembl transcripts with gapped alignments #416

Open Peter-J-Freeman opened 2 years ago

Peter-J-Freeman commented 2 years ago

In theory, Ensembl transcripts should not have gaps because they must align directly to a genome build. However, we know that there are examples of Ensembl transcripts even in GRCh38 where the alignment needs to be tweaked to allow for an alignment.

We need to ask Ensmebl how this is handled and look at some examples of known gaps and see how VV handles them.

Here are examples that can be found in

1 base gap in the genome (Yenn et al) 19-41123094-G-GG LTBP4 1 base gap in the transcripts (Yenn et al) 15-72105928-AC-A NR2E3 3 base gap in the transcripts NC_000002.11:g.95847041_95847043GCG= ZNF2 GRCh37/38 gap NC_000004.12:g.139889957_139889968del MAML3

We have other examples but these will do for now. MAML3 might be interesting because it looks like an oncogene.

@sbenny1230 Try running these and adding them to the Ensmembl tests. Remember, they are GRCh37. The last one can be updated and run as GRCh38 though please, the variant being NC_000004.12:g.139889957_139889968del although the MANE select was easy to generate because it just requires the deletion of an in frame number of bases from the original transcript from NM_018717.4 to NM_018717.5. I want to know the justification for removing 3 or 4 amino acids just so a MANE select can be created!!!

sbenny1230 commented 2 years ago

Just going to print the current results for each

sbenny1230 commented 2 years ago

Input:

variant = '15-72105928-AC-A'
genome_build = 'GRCh37'

Output:

{
    "ENST00000326995.5:n.1175del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "15",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000031544",
                "hgnc": "7974",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "15q23",
            "note": "nuclear receptor subfamily 2 group E member 3",
            "refseq_select": false,
            "variant": "201"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS73750",
                "CCDS73751"
            ],
            "ensembl_gene_id": "ENSG00000278570",
            "entrez_gene_id": "10002",
            "hgnc_id": "HGNC:7974",
            "omim_id": [
                "604485"
            ],
            "ucsc_id": "uc032cil.2"
        },
        "gene_symbol": "NR2E3",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000326995.5:n.1175del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "chr15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            }
        },
        "reference_sequence_records": {
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000326995.5"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "15-72105928-AC-A",
        "transcript_description": "NR2E3-201",
        "validation_warnings": [
            "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
            "Removing redundant reference bases from variant description"
        ],
        "variant_exonic_positions": null
    },
    "ENST00000398840.2:n.1141del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "15",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000031544",
                "hgnc": "7974",
                "ncbigene": null,
                "select": "Ensembl"
            },
            "ensembl_select": true,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "15q23",
            "note": "nuclear receptor subfamily 2 group E member 3",
            "refseq_select": false,
            "variant": "202"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS73750",
                "CCDS73751"
            ],
            "ensembl_gene_id": "ENSG00000278570",
            "entrez_gene_id": "10002",
            "hgnc_id": "HGNC:7974",
            "omim_id": [
                "604485"
            ],
            "ucsc_id": "uc032cil.2"
        },
        "gene_symbol": "NR2E3",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000398840.2:n.1141del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "chr15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            }
        },
        "reference_sequence_records": {
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000398840.2"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "15-72105928-AC-A",
        "transcript_description": "NR2E3-202",
        "validation_warnings": [
            "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
            "Removing redundant reference bases from variant description"
        ],
        "variant_exonic_positions": null
    },
    "ENST00000561604.1:n.1176del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "15",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000031544",
                "hgnc": "7974",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "15q23",
            "note": "nuclear receptor subfamily 2 group E member 3",
            "refseq_select": false,
            "variant": "004"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS73750",
                "CCDS73751"
            ],
            "ensembl_gene_id": "ENSG00000278570",
            "entrez_gene_id": "10002",
            "hgnc_id": "HGNC:7974",
            "omim_id": [
                "604485"
            ],
            "ucsc_id": "uc032cil.2"
        },
        "gene_symbol": "NR2E3",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000561604.1:n.1176del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "chr15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            }
        },
        "reference_sequence_records": {
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000561604.1"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "15-72105928-AC-A",
        "transcript_description": "NR2E3-004",
        "validation_warnings": [
            "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
            "Removing redundant reference bases from variant description"
        ],
        "variant_exonic_positions": null
    },
    "ENST00000562839.1:n.1104del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "15",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000031544",
                "hgnc": "7974",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "15q23",
            "note": "nuclear receptor subfamily 2 group E member 3",
            "refseq_select": false,
            "variant": "002"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS73750",
                "CCDS73751"
            ],
            "ensembl_gene_id": "ENSG00000278570",
            "entrez_gene_id": "10002",
            "hgnc_id": "HGNC:7974",
            "omim_id": [
                "604485"
            ],
            "ucsc_id": "uc032cil.2"
        },
        "gene_symbol": "NR2E3",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000562839.1:n.1104del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "chr15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            }
        },
        "reference_sequence_records": {
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000562839.1"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "15-72105928-AC-A",
        "transcript_description": "NR2E3-002",
        "validation_warnings": [
            "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
            "Removing redundant reference bases from variant description"
        ],
        "variant_exonic_positions": null
    },
    "ENST00000562925.1:n.1184del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "15",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000031544",
                "hgnc": "7974",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "15q23",
            "note": "nuclear receptor subfamily 2 group E member 3",
            "refseq_select": false,
            "variant": "003"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS73750",
                "CCDS73751"
            ],
            "ensembl_gene_id": "ENSG00000278570",
            "entrez_gene_id": "10002",
            "hgnc_id": "HGNC:7974",
            "omim_id": [
                "604485"
            ],
            "ucsc_id": "uc032cil.2"
        },
        "gene_symbol": "NR2E3",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000562925.1:n.1184del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "chr15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            }
        },
        "reference_sequence_records": {
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000562925.1"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "15-72105928-AC-A",
        "transcript_description": "NR2E3-003",
        "validation_warnings": [
            "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
            "Removing redundant reference bases from variant description"
        ],
        "variant_exonic_positions": null
    },
    "ENST00000567496.1:n.1142del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "15",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000031544",
                "hgnc": "7974",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "15q23",
            "note": "nuclear receptor subfamily 2 group E member 3",
            "refseq_select": false,
            "variant": "001"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS73750",
                "CCDS73751"
            ],
            "ensembl_gene_id": "ENSG00000278570",
            "entrez_gene_id": "10002",
            "hgnc_id": "HGNC:7974",
            "omim_id": [
                "604485"
            ],
            "ucsc_id": "uc032cil.2"
        },
        "gene_symbol": "NR2E3",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000567496.1:n.1142del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000015.9:g.72105933del",
                "vcf": {
                    "alt": "A",
                    "chr": "chr15",
                    "pos": "72105928",
                    "ref": "AC"
                }
            }
        },
        "reference_sequence_records": {
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000567496.1"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "15-72105928-AC-A",
        "transcript_description": "NR2E3-001",
        "validation_warnings": [
            "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
            "Removing redundant reference bases from variant description"
        ],
        "variant_exonic_positions": null
    },
    "flag": "gene_variant",
    "metadata": {
        "variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
        "variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
        "vvdb_version": "vvdb_2022_04",
        "vvseqrepo_db": "VV_SR_2022_02/master",
        "vvta_version": "vvta_2022_02"
    }
}
sbenny1230 commented 2 years ago

Input:

variant = 'NC_000002.11:g.95847041_95847043GCG='
genome_build = 'GRCh37'

Output:

{
    "flag": "warning",
    "metadata": {
        "variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
        "variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
        "vvdb_version": "vvdb_2022_04",
        "vvseqrepo_db": "VV_SR_2022_02/master",
        "vvta_version": "vvta_2022_02"
    },
    "validation_warning_1": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
        "transcript_description": "ZNF2-005",
        "validation_warnings": [
            "Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000295210 for available transcripts",
            "Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000340539 for available transcripts",
            "Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000398107 for available transcripts",
            "Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000425369 for available transcripts",
            "Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000453539 for available transcripts"
        ],
        "variant_exonic_positions": null
    },
    "validation_warning_2": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
        "transcript_description": "ZNF2-001",
        "validation_warnings": [
            "Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000295210 for available transcripts",
            "Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000340539 for available transcripts",
            "Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000398107 for available transcripts",
            "Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000425369 for available transcripts",
            "Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000453539 for available transcripts"
        ],
        "variant_exonic_positions": null
    },
    "validation_warning_3": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
        "transcript_description": "ZNF2-003",
        "validation_warnings": [
            "Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000295210 for available transcripts",
            "Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000340539 for available transcripts",
            "Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000398107 for available transcripts",
            "Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000425369 for available transcripts",
            "Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000453539 for available transcripts"
        ],
        "variant_exonic_positions": null
    },
    "validation_warning_4": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
        "transcript_description": "ZNF2-004",
        "validation_warnings": [
            "Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000295210 for available transcripts",
            "Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000340539 for available transcripts",
            "Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000398107 for available transcripts",
            "Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000425369 for available transcripts",
            "Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000453539 for available transcripts"
        ],
        "variant_exonic_positions": null
    },
    "validation_warning_5": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
        "transcript_description": "ZNF2-002",
        "validation_warnings": [
            "Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000295210 for available transcripts",
            "Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000340539 for available transcripts",
            "Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000398107 for available transcripts",
            "Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000425369 for available transcripts",
            "Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000453539 for available transcripts"
        ],
        "variant_exonic_positions": null
    }
}
sbenny1230 commented 2 years ago

Input:

variant = 'NC_000004.12:g.139889957_139889968del'
genome_build = 'GRCh37'

Output:

{
    "ENST00000398940.1:c.109-10_110del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "4",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000196782",
                "hgnc": "16272",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "4q31.1",
            "note": "mastermind like transcriptional coactivator 3",
            "refseq_select": false,
            "variant": "202"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS54805"
            ],
            "ensembl_gene_id": "ENSG00000196782",
            "entrez_gene_id": "55534",
            "hgnc_id": "HGNC:16272",
            "omim_id": [
                "608991"
            ],
            "ucsc_id": "uc062zte.1"
        },
        "gene_symbol": "MAML3",
        "genome_context_intronic_sequence": "NC_000004.11(ENST00000398940.1):c.109-10_110del",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "ENSP00000381913.1:p.?",
            "tlr": "ENSP00000381913.1:p.?"
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000398940.1:c.109-10_110del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
                "vcf": {
                    "alt": "T",
                    "chr": "4",
                    "pos": "140811063",
                    "ref": "TTGCTGCTGCTGC"
                }
            },
            "grch38": {
                "hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
                "vcf": {
                    "alt": "T",
                    "chr": "4",
                    "pos": "139889909",
                    "ref": "TTGCTGCTGCTGC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
                "vcf": {
                    "alt": "T",
                    "chr": "chr4",
                    "pos": "140811063",
                    "ref": "TTGCTGCTGCTGC"
                }
            },
            "hg38": {
                "hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
                "vcf": {
                    "alt": "T",
                    "chr": "chr4",
                    "pos": "139889909",
                    "ref": "TTGCTGCTGCTGC"
                }
            }
        },
        "reference_sequence_records": {
            "protein": "https://grch37.ensembl.org/Homo_sapiens/Transcript/ProteinSummary?db=core;p=ENSP00000381913.1",
            "transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000398940.1"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000004.12:g.139889957_139889968del",
        "transcript_description": "MAML3-202",
        "validation_warnings": [
            "Removing redundant reference bases from variant description",
            "ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
            "ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~"
        ],
        "variant_exonic_positions": null
    },
    "flag": "gene_variant",
    "metadata": {
        "variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
        "variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
        "vvdb_version": "vvdb_2022_04",
        "vvseqrepo_db": "VV_SR_2022_02/master",
        "vvta_version": "vvta_2022_02"
    },
    "validation_warning_1": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh37",
        "submitted_variant": "NC_000004.12:g.139889957_139889968del",
        "transcript_description": "MAML3-203",
        "validation_warnings": [
            "Removing redundant reference bases from variant description",
            "ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
            "ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~"
        ],
        "variant_exonic_positions": null
    }
}
sbenny1230 commented 2 years ago

Input:

variant = 'NC_000004.12:g.139889957_139889968del'
genome_build = 'GRCh38'

Output:

{
    "ENST00000398940.1:c.109-10_110del": {
        "alt_genomic_loci": [],
        "annotations": {
            "chromosome": "4",
            "db_xref": {
                "CCDS": null,
                "ensemblgene": "ENSG00000196782",
                "hgnc": "16272",
                "ncbigene": null,
                "select": false
            },
            "ensembl_select": false,
            "mane_plus_clinical": false,
            "mane_select": false,
            "map": "4q31.1",
            "note": "mastermind like transcriptional coactivator 3",
            "refseq_select": false,
            "variant": "202"
        },
        "gene_ids": {
            "ccds_ids": [
                "CCDS54805"
            ],
            "ensembl_gene_id": "ENSG00000196782",
            "entrez_gene_id": "55534",
            "hgnc_id": "HGNC:16272",
            "omim_id": [
                "608991"
            ],
            "ucsc_id": "uc062zte.1"
        },
        "gene_symbol": "MAML3",
        "genome_context_intronic_sequence": "NC_000004.12(ENST00000398940.1):c.109-10_110del",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "ENSP00000381913.1:p.?",
            "tlr": "ENSP00000381913.1:p.?"
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "ENST00000398940.1:c.109-10_110del",
        "primary_assembly_loci": {
            "grch37": {
                "hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
                "vcf": {
                    "alt": "T",
                    "chr": "4",
                    "pos": "140811063",
                    "ref": "TTGCTGCTGCTGC"
                }
            },
            "grch38": {
                "hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
                "vcf": {
                    "alt": "T",
                    "chr": "4",
                    "pos": "139889909",
                    "ref": "TTGCTGCTGCTGC"
                }
            },
            "hg19": {
                "hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
                "vcf": {
                    "alt": "T",
                    "chr": "chr4",
                    "pos": "140811063",
                    "ref": "TTGCTGCTGCTGC"
                }
            },
            "hg38": {
                "hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
                "vcf": {
                    "alt": "T",
                    "chr": "chr4",
                    "pos": "139889909",
                    "ref": "TTGCTGCTGCTGC"
                }
            }
        },
        "reference_sequence_records": {
            "protein": "https://www.ensembl.org/Homo_sapiens/Transcript/ProteinSummary?db=core;p=ENSP00000381913.1",
            "transcript": "https://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000398940.1"
        },
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh38",
        "submitted_variant": "NC_000004.12:g.139889957_139889968del",
        "transcript_description": "MAML3-202",
        "validation_warnings": [
            "Removing redundant reference bases from variant description",
            "ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
            "Required information for ENST00000509479.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000509479 for available transcripts"
        ],
        "variant_exonic_positions": null
    },
    "flag": "gene_variant",
    "metadata": {
        "variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
        "variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
        "vvdb_version": "vvdb_2022_04",
        "vvseqrepo_db": "VV_SR_2022_02/master",
        "vvta_version": "vvta_2022_02"
    },
    "validation_warning_1": {
        "alt_genomic_loci": [],
        "annotations": {},
        "gene_ids": {},
        "gene_symbol": "",
        "genome_context_intronic_sequence": "",
        "hgvs_lrg_transcript_variant": "",
        "hgvs_lrg_variant": "",
        "hgvs_predicted_protein_consequence": {
            "lrg_slr": "",
            "lrg_tlr": "",
            "slr": "",
            "tlr": ""
        },
        "hgvs_refseqgene_variant": "",
        "hgvs_transcript_variant": "",
        "primary_assembly_loci": {},
        "reference_sequence_records": "",
        "refseqgene_context_intronic_sequence": "",
        "selected_assembly": "GRCh38",
        "submitted_variant": "NC_000004.12:g.139889957_139889968del",
        "transcript_description": "MAML3-202",
        "validation_warnings": [
            "Removing redundant reference bases from variant description",
            "ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
            "Required information for ENST00000509479.6 is missing from the Universal Transcript Archive",
            "Query gene2transcripts with search term ENST00000509479 for available transcripts"
        ],
        "variant_exonic_positions": null
    }
}