Open Peter-J-Freeman opened 2 years ago
Just going to print the current results for each
Input:
variant = '15-72105928-AC-A'
genome_build = 'GRCh37'
Output:
{
"ENST00000326995.5:n.1175del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "15",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000031544",
"hgnc": "7974",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "15q23",
"note": "nuclear receptor subfamily 2 group E member 3",
"refseq_select": false,
"variant": "201"
},
"gene_ids": {
"ccds_ids": [
"CCDS73750",
"CCDS73751"
],
"ensembl_gene_id": "ENSG00000278570",
"entrez_gene_id": "10002",
"hgnc_id": "HGNC:7974",
"omim_id": [
"604485"
],
"ucsc_id": "uc032cil.2"
},
"gene_symbol": "NR2E3",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000326995.5:n.1175del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "15",
"pos": "72105928",
"ref": "AC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "chr15",
"pos": "72105928",
"ref": "AC"
}
}
},
"reference_sequence_records": {
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000326995.5"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "15-72105928-AC-A",
"transcript_description": "NR2E3-201",
"validation_warnings": [
"NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
"Removing redundant reference bases from variant description"
],
"variant_exonic_positions": null
},
"ENST00000398840.2:n.1141del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "15",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000031544",
"hgnc": "7974",
"ncbigene": null,
"select": "Ensembl"
},
"ensembl_select": true,
"mane_plus_clinical": false,
"mane_select": false,
"map": "15q23",
"note": "nuclear receptor subfamily 2 group E member 3",
"refseq_select": false,
"variant": "202"
},
"gene_ids": {
"ccds_ids": [
"CCDS73750",
"CCDS73751"
],
"ensembl_gene_id": "ENSG00000278570",
"entrez_gene_id": "10002",
"hgnc_id": "HGNC:7974",
"omim_id": [
"604485"
],
"ucsc_id": "uc032cil.2"
},
"gene_symbol": "NR2E3",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000398840.2:n.1141del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "15",
"pos": "72105928",
"ref": "AC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "chr15",
"pos": "72105928",
"ref": "AC"
}
}
},
"reference_sequence_records": {
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000398840.2"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "15-72105928-AC-A",
"transcript_description": "NR2E3-202",
"validation_warnings": [
"NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
"Removing redundant reference bases from variant description"
],
"variant_exonic_positions": null
},
"ENST00000561604.1:n.1176del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "15",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000031544",
"hgnc": "7974",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "15q23",
"note": "nuclear receptor subfamily 2 group E member 3",
"refseq_select": false,
"variant": "004"
},
"gene_ids": {
"ccds_ids": [
"CCDS73750",
"CCDS73751"
],
"ensembl_gene_id": "ENSG00000278570",
"entrez_gene_id": "10002",
"hgnc_id": "HGNC:7974",
"omim_id": [
"604485"
],
"ucsc_id": "uc032cil.2"
},
"gene_symbol": "NR2E3",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000561604.1:n.1176del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "15",
"pos": "72105928",
"ref": "AC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "chr15",
"pos": "72105928",
"ref": "AC"
}
}
},
"reference_sequence_records": {
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000561604.1"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "15-72105928-AC-A",
"transcript_description": "NR2E3-004",
"validation_warnings": [
"NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
"Removing redundant reference bases from variant description"
],
"variant_exonic_positions": null
},
"ENST00000562839.1:n.1104del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "15",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000031544",
"hgnc": "7974",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "15q23",
"note": "nuclear receptor subfamily 2 group E member 3",
"refseq_select": false,
"variant": "002"
},
"gene_ids": {
"ccds_ids": [
"CCDS73750",
"CCDS73751"
],
"ensembl_gene_id": "ENSG00000278570",
"entrez_gene_id": "10002",
"hgnc_id": "HGNC:7974",
"omim_id": [
"604485"
],
"ucsc_id": "uc032cil.2"
},
"gene_symbol": "NR2E3",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000562839.1:n.1104del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "15",
"pos": "72105928",
"ref": "AC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "chr15",
"pos": "72105928",
"ref": "AC"
}
}
},
"reference_sequence_records": {
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000562839.1"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "15-72105928-AC-A",
"transcript_description": "NR2E3-002",
"validation_warnings": [
"NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
"Removing redundant reference bases from variant description"
],
"variant_exonic_positions": null
},
"ENST00000562925.1:n.1184del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "15",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000031544",
"hgnc": "7974",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "15q23",
"note": "nuclear receptor subfamily 2 group E member 3",
"refseq_select": false,
"variant": "003"
},
"gene_ids": {
"ccds_ids": [
"CCDS73750",
"CCDS73751"
],
"ensembl_gene_id": "ENSG00000278570",
"entrez_gene_id": "10002",
"hgnc_id": "HGNC:7974",
"omim_id": [
"604485"
],
"ucsc_id": "uc032cil.2"
},
"gene_symbol": "NR2E3",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000562925.1:n.1184del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "15",
"pos": "72105928",
"ref": "AC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "chr15",
"pos": "72105928",
"ref": "AC"
}
}
},
"reference_sequence_records": {
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000562925.1"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "15-72105928-AC-A",
"transcript_description": "NR2E3-003",
"validation_warnings": [
"NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
"Removing redundant reference bases from variant description"
],
"variant_exonic_positions": null
},
"ENST00000567496.1:n.1142del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "15",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000031544",
"hgnc": "7974",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "15q23",
"note": "nuclear receptor subfamily 2 group E member 3",
"refseq_select": false,
"variant": "001"
},
"gene_ids": {
"ccds_ids": [
"CCDS73750",
"CCDS73751"
],
"ensembl_gene_id": "ENSG00000278570",
"entrez_gene_id": "10002",
"hgnc_id": "HGNC:7974",
"omim_id": [
"604485"
],
"ucsc_id": "uc032cil.2"
},
"gene_symbol": "NR2E3",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000567496.1:n.1142del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "15",
"pos": "72105928",
"ref": "AC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000015.9:g.72105933del",
"vcf": {
"alt": "A",
"chr": "chr15",
"pos": "72105928",
"ref": "AC"
}
}
},
"reference_sequence_records": {
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000567496.1"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "15-72105928-AC-A",
"transcript_description": "NR2E3-001",
"validation_warnings": [
"NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC",
"Removing redundant reference bases from variant description"
],
"variant_exonic_positions": null
},
"flag": "gene_variant",
"metadata": {
"variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
"variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
"vvdb_version": "vvdb_2022_04",
"vvseqrepo_db": "VV_SR_2022_02/master",
"vvta_version": "vvta_2022_02"
}
}
Input:
variant = 'NC_000002.11:g.95847041_95847043GCG='
genome_build = 'GRCh37'
Output:
{
"flag": "warning",
"metadata": {
"variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
"variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
"vvdb_version": "vvdb_2022_04",
"vvseqrepo_db": "VV_SR_2022_02/master",
"vvta_version": "vvta_2022_02"
},
"validation_warning_1": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
"transcript_description": "ZNF2-005",
"validation_warnings": [
"Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000295210 for available transcripts",
"Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000340539 for available transcripts",
"Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000398107 for available transcripts",
"Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000425369 for available transcripts",
"Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000453539 for available transcripts"
],
"variant_exonic_positions": null
},
"validation_warning_2": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
"transcript_description": "ZNF2-001",
"validation_warnings": [
"Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000295210 for available transcripts",
"Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000340539 for available transcripts",
"Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000398107 for available transcripts",
"Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000425369 for available transcripts",
"Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000453539 for available transcripts"
],
"variant_exonic_positions": null
},
"validation_warning_3": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
"transcript_description": "ZNF2-003",
"validation_warnings": [
"Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000295210 for available transcripts",
"Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000340539 for available transcripts",
"Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000398107 for available transcripts",
"Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000425369 for available transcripts",
"Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000453539 for available transcripts"
],
"variant_exonic_positions": null
},
"validation_warning_4": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
"transcript_description": "ZNF2-004",
"validation_warnings": [
"Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000295210 for available transcripts",
"Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000340539 for available transcripts",
"Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000398107 for available transcripts",
"Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000425369 for available transcripts",
"Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000453539 for available transcripts"
],
"variant_exonic_positions": null
},
"validation_warning_5": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000002.11:g.95847041_95847043GCG=",
"transcript_description": "ZNF2-002",
"validation_warnings": [
"Required information for ENST00000295210.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000295210 for available transcripts",
"Required information for ENST00000340539.5 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000340539 for available transcripts",
"Required information for ENST00000398107.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000398107 for available transcripts",
"Required information for ENST00000425369.1 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000425369 for available transcripts",
"Required information for ENST00000453539.2 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000453539 for available transcripts"
],
"variant_exonic_positions": null
}
}
Input:
variant = 'NC_000004.12:g.139889957_139889968del'
genome_build = 'GRCh37'
Output:
{
"ENST00000398940.1:c.109-10_110del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "4",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000196782",
"hgnc": "16272",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "4q31.1",
"note": "mastermind like transcriptional coactivator 3",
"refseq_select": false,
"variant": "202"
},
"gene_ids": {
"ccds_ids": [
"CCDS54805"
],
"ensembl_gene_id": "ENSG00000196782",
"entrez_gene_id": "55534",
"hgnc_id": "HGNC:16272",
"omim_id": [
"608991"
],
"ucsc_id": "uc062zte.1"
},
"gene_symbol": "MAML3",
"genome_context_intronic_sequence": "NC_000004.11(ENST00000398940.1):c.109-10_110del",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "ENSP00000381913.1:p.?",
"tlr": "ENSP00000381913.1:p.?"
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000398940.1:c.109-10_110del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
"vcf": {
"alt": "T",
"chr": "4",
"pos": "140811063",
"ref": "TTGCTGCTGCTGC"
}
},
"grch38": {
"hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
"vcf": {
"alt": "T",
"chr": "4",
"pos": "139889909",
"ref": "TTGCTGCTGCTGC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
"vcf": {
"alt": "T",
"chr": "chr4",
"pos": "140811063",
"ref": "TTGCTGCTGCTGC"
}
},
"hg38": {
"hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
"vcf": {
"alt": "T",
"chr": "chr4",
"pos": "139889909",
"ref": "TTGCTGCTGCTGC"
}
}
},
"reference_sequence_records": {
"protein": "https://grch37.ensembl.org/Homo_sapiens/Transcript/ProteinSummary?db=core;p=ENSP00000381913.1",
"transcript": "https://grch37.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000398940.1"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000004.12:g.139889957_139889968del",
"transcript_description": "MAML3-202",
"validation_warnings": [
"Removing redundant reference bases from variant description",
"ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
"ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~"
],
"variant_exonic_positions": null
},
"flag": "gene_variant",
"metadata": {
"variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
"variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
"vvdb_version": "vvdb_2022_04",
"vvseqrepo_db": "VV_SR_2022_02/master",
"vvta_version": "vvta_2022_02"
},
"validation_warning_1": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh37",
"submitted_variant": "NC_000004.12:g.139889957_139889968del",
"transcript_description": "MAML3-203",
"validation_warnings": [
"Removing redundant reference bases from variant description",
"ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
"ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~ENST00000509479.6:c.1514_1527delinsCA: Variant reference (CGCAGCAGCAGCAA) does not agree with reference sequence (AGCAGCAGCAGCAA)/ENST00000509479.6/NC_000004.12~"
],
"variant_exonic_positions": null
}
}
Input:
variant = 'NC_000004.12:g.139889957_139889968del'
genome_build = 'GRCh38'
Output:
{
"ENST00000398940.1:c.109-10_110del": {
"alt_genomic_loci": [],
"annotations": {
"chromosome": "4",
"db_xref": {
"CCDS": null,
"ensemblgene": "ENSG00000196782",
"hgnc": "16272",
"ncbigene": null,
"select": false
},
"ensembl_select": false,
"mane_plus_clinical": false,
"mane_select": false,
"map": "4q31.1",
"note": "mastermind like transcriptional coactivator 3",
"refseq_select": false,
"variant": "202"
},
"gene_ids": {
"ccds_ids": [
"CCDS54805"
],
"ensembl_gene_id": "ENSG00000196782",
"entrez_gene_id": "55534",
"hgnc_id": "HGNC:16272",
"omim_id": [
"608991"
],
"ucsc_id": "uc062zte.1"
},
"gene_symbol": "MAML3",
"genome_context_intronic_sequence": "NC_000004.12(ENST00000398940.1):c.109-10_110del",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "ENSP00000381913.1:p.?",
"tlr": "ENSP00000381913.1:p.?"
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "ENST00000398940.1:c.109-10_110del",
"primary_assembly_loci": {
"grch37": {
"hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
"vcf": {
"alt": "T",
"chr": "4",
"pos": "140811063",
"ref": "TTGCTGCTGCTGC"
}
},
"grch38": {
"hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
"vcf": {
"alt": "T",
"chr": "4",
"pos": "139889909",
"ref": "TTGCTGCTGCTGC"
}
},
"hg19": {
"hgvs_genomic_description": "NC_000004.11:g.140811111_140811122del",
"vcf": {
"alt": "T",
"chr": "chr4",
"pos": "140811063",
"ref": "TTGCTGCTGCTGC"
}
},
"hg38": {
"hgvs_genomic_description": "NC_000004.12:g.139889957_139889968del",
"vcf": {
"alt": "T",
"chr": "chr4",
"pos": "139889909",
"ref": "TTGCTGCTGCTGC"
}
}
},
"reference_sequence_records": {
"protein": "https://www.ensembl.org/Homo_sapiens/Transcript/ProteinSummary?db=core;p=ENSP00000381913.1",
"transcript": "https://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=ENST00000398940.1"
},
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh38",
"submitted_variant": "NC_000004.12:g.139889957_139889968del",
"transcript_description": "MAML3-202",
"validation_warnings": [
"Removing redundant reference bases from variant description",
"ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
"Required information for ENST00000509479.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000509479 for available transcripts"
],
"variant_exonic_positions": null
},
"flag": "gene_variant",
"metadata": {
"variantvalidator_hgvs_version": "2.0.2.dev1+g6ecbf8e",
"variantvalidator_version": "1.0.5.dev324+g714b8d1.d20220718",
"vvdb_version": "vvdb_2022_04",
"vvseqrepo_db": "VV_SR_2022_02/master",
"vvta_version": "vvta_2022_02"
},
"validation_warning_1": {
"alt_genomic_loci": [],
"annotations": {},
"gene_ids": {},
"gene_symbol": "",
"genome_context_intronic_sequence": "",
"hgvs_lrg_transcript_variant": "",
"hgvs_lrg_variant": "",
"hgvs_predicted_protein_consequence": {
"lrg_slr": "",
"lrg_tlr": "",
"slr": "",
"tlr": ""
},
"hgvs_refseqgene_variant": "",
"hgvs_transcript_variant": "",
"primary_assembly_loci": {},
"reference_sequence_records": "",
"refseqgene_context_intronic_sequence": "",
"selected_assembly": "GRCh38",
"submitted_variant": "NC_000004.12:g.139889957_139889968del",
"transcript_description": "MAML3-202",
"validation_warnings": [
"Removing redundant reference bases from variant description",
"ENST00000398940.1:c.97_108del normalized to ENST00000398940.1:c.109-10_110del",
"Required information for ENST00000509479.6 is missing from the Universal Transcript Archive",
"Query gene2transcripts with search term ENST00000509479 for available transcripts"
],
"variant_exonic_positions": null
}
}
In theory, Ensembl transcripts should not have gaps because they must align directly to a genome build. However, we know that there are examples of Ensembl transcripts even in GRCh38 where the alignment needs to be tweaked to allow for an alignment.
We need to ask Ensmebl how this is handled and look at some examples of known gaps and see how VV handles them.
Here are examples that can be found in
1 base gap in the genome (Yenn et al) 19-41123094-G-GG LTBP4 1 base gap in the transcripts (Yenn et al) 15-72105928-AC-A NR2E3 3 base gap in the transcripts NC_000002.11:g.95847041_95847043GCG= ZNF2 GRCh37/38 gap NC_000004.12:g.139889957_139889968del MAML3
We have other examples but these will do for now. MAML3 might be interesting because it looks like an oncogene.
@sbenny1230 Try running these and adding them to the Ensmembl tests. Remember, they are GRCh37. The last one can be updated and run as GRCh38 though please, the variant being
NC_000004.12:g.139889957_139889968del
although the MANE select was easy to generate because it just requires the deletion of an in frame number of bases from the original transcript from NM_018717.4 to NM_018717.5. I want to know the justification for removing 3 or 4 amino acids just so a MANE select can be created!!!