clingen-data-model / genegraph

Presents an RDF triplestore of gene information using GraphQL APIs
5 stars 0 forks source link

Some variation descriptors do not have a variant #748

Open theferrit32 opened 1 year ago

theferrit32 commented 1 year ago

Probably some error happened when ingesting this variant from clinvar-raw which wasn't checked for and didn't cause an exception, so the variation descriptor was still added, just with no variant. This one is absolute CNV, which we should be able to parse.

{:msg "nil variation type",
 :descriptor {:description "GRCh37/hg19 17q12(chr17:36059104-36244358)x3",
              :subject_variation_descriptor (),
              :type "CanonicalVariationDescriptor",
              :xrefs ["https://www.ncbi.nlm.nih.gov/clinvar/153916" "https://identifiers.org/clinvar:153916"],
              :alternate_labels [],
              :canonical_variation nil,
              :record_metadata {:type "RecordMetadata",
                                :is_version_of "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_153916",
                                :version "2019-09-02"},
              :extensions ({:type "Extension", :name "variation_type", :value "copy number gain"} {:type "Extension", :name "entity_type", :value "variation"} {:type "Extension", :name "protein_change", :value []} {:type "Extension", :name "clingen_version", :value 0} {:type "Extension", :name "child_ids", :value []} {:type "Extension", :name "allele_id", :value "163667"} {:type "Extension", :name "subclass_type", :value "SimpleAllele"} {:type "Extension", :name "clinvar_variation", :value "https://identifiers.org/clinvar:153916"} {:type "Extension", :name "descendant_ids", :value []} {:type "Extension", :name "canonical_expression", :value nil} {:type "Extension", :name "candidate_expressions", :value ()}),
              :label "GRCh37/hg19 17q12(chr17:36059104-36244358)x3",
              :id "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_153916.2019-09-02",
              :members [{:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NC_000017.10:g.(?_36059104)_(36244358_?)dup"}]}]},
 :line 347}
theferrit32 commented 1 year ago

Two more that are not absolute CNV

{:description "NM_000350.3(ABCA4):c.1099+80A>G",
 :subject_variation_descriptor (),
 :type "CanonicalVariationDescriptor",
 :xrefs ["https://www.ncbi.nlm.nih.gov/clinvar/1707292" "https://identifiers.org/clinvar:1707292"],
 :alternate_labels [],
 :canonical_variation nil,
 :record_metadata {:type "RecordMetadata",
                   :is_version_of "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1707292",
                   :version "2022-10-01"},
 :extensions ({:type "Extension", :name "variation_type", :value "single nucleotide variant"}
              {:type "Extension", :name "entity_type", :value "variation"}
              {:type "Extension", :name "protein_change", :value []}
              {:type "Extension", :name "clingen_version", :value 0}
              {:type "Extension", :name "child_ids", :value []}
              {:type "Extension", :name "allele_id", :value "1705601"}
              {:type "Extension", :name "subclass_type", :value "SimpleAllele"}
              {:type "Extension", :name "clinvar_variation", :value "https://identifiers.org/clinvar:1707292"}
              {:type "Extension", :name "descendant_ids", :value []}
              {:type "Extension", :name "canonical_expression", :value nil}
              {:type "Extension", :name "candidate_expressions", :value ({:expression "NC_000001.11:94080397:T:C", :label "SPDI"}
                                                                         {:expression "NC_000001.11:g.94080398T>C", :label "GRCh38"}
                                                                         {:expression "NC_000001.10:g.94545954T>C", :label "GRCh37"}
                                                                         {:expression "clinvar:1707292", :label "Text"})}),
 :label "NM_000350.3(ABCA4):c.1099+80A>G",
 :id "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1707292.2022-10-01",
 :members [{:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NG_009073.1:g.45752A>G"}]} {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NG_009073.2:g.45750A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NC_000001.10:g.94545954T>C"}]} {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NC_000001.11:g.94080398T>C"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.c", :value "NM_000350.3:c.1099+80A>G"}]} {:type "VariationMember",
                                                                                                                                :expressions [{:type "Expression", :syntax "spdi", :value "NC_000001.11:94080397:T:C"}]}]}

{:description "NM_000543.5(SMPD1):c.1264-78A>G",
 :subject_variation_descriptor (),
 :type "CanonicalVariationDescriptor",
 :xrefs ["https://www.ncbi.nlm.nih.gov/clinvar/1707296" "https://identifiers.org/clinvar:1707296"],
 :alternate_labels [],
 :canonical_variation nil,
 :record_metadata {:type "RecordMetadata",
                   :is_version_of "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1707296",
                   :version "2022-10-01"},
 :extensions ({:type "Extension", :name "variation_type", :value "single nucleotide variant"}
              {:type "Extension", :name "entity_type", :value "variation"}
              {:type "Extension", :name "protein_change", :value []}
              {:type "Extension", :name "clingen_version", :value 0}
              {:type "Extension", :name "child_ids", :value []}
              {:type "Extension", :name "allele_id", :value "1705605"}
              {:type "Extension", :name "subclass_type", :value "SimpleAllele"}
              {:type "Extension", :name "clinvar_variation", :value "https://identifiers.org/clinvar:1707296"}
              {:type "Extension", :name "descendant_ids", :value []}
              {:type "Extension", :name "canonical_expression", :value nil}
              {:type "Extension", :name "candidate_expressions", :value ({:expression "NC_000011.10:6393538:A:G", :label "SPDI"}
                                                                         {:expression "NC_000011.10:g.6393539A>G", :label "GRCh38"}
                                                                         {:expression "NC_000011.9:g.6414769A>G", :label "GRCh37"}
                                                                         {:expression "clinvar:1707296", :label "Text"})}),
 :label "NM_000543.5(SMPD1):c.1264-78A>G",
 :id "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1707296.2022-10-01",
 :members [{:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NG_011780.1:g.8115A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NG_029615.1:g.30876T>C"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NC_000011.10:g.6393539A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NC_000011.9:g.6414769A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.c", :value "NM_001318088.2:c.343-78A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.c", :value "NM_001365135.2:c.1132-78A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.c", :value "NM_001318087.2:c.1264-78A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.c", :value "NM_000543.5:c.1264-78A>G"}]}
           {:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.c", :value "NM_001007593.3:c.1261-78A>G"}]}
           {:type "VariationMember" :expressions [{:type "Expression", :syntax "spdi", :value "NC_000011.10:6393538:A:G"}]}]}
theferrit32 commented 1 year ago

Some copy number changes that don't have any HGVS or SPDI. Title is valid HGVS dup. Should come out as Text, not nil. {:description "Single allele", :subject_variation_descriptor (), :type "CanonicalVariationDescriptor", :xrefs ["https://www.ncbi.nlm.nih.gov/clinvar/1679730" "https://identifiers.org/clinvar:1679730"], :alternate_labels [], :canonical_variation nil, :record_metadata {:type "RecordMetadata", :is_version_of "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1679730", :version "2023-04-10"}, :extensions ({:type "Extension", :name "variation_type", :value "Duplication"} {:type "Extension", :name "entity_type", :value "variation"} {:type "Extension", :name "protein_change", :value []} {:type "Extension", :name "child_ids", :value []} {:type "Extension", :name "allele_id", :value "1671649"} {:type "Extension", :name "subclass_type", :value "SimpleAllele"} {:type "Extension", :name "clinvar_variation", :value "https://identifiers.org/clinvar:1679730"} {:type "Extension", :name "descendant_ids", :value []} {:type "Extension", :name "canonical_expression", :value nil} {:type "Extension", :name "candidate_expressions", :value ({:expression "clinvar:1679730", :label "Text"})}), :label "Single allele", :id "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1679730.2023-04-10", :members []}

{:description "NC_000018.9:g.(48556994_48573289)_48573471dup", :subject_variation_descriptor (), :type "CanonicalVariationDescriptor", :xrefs ["https://www.ncbi.nlm.nih.gov/clinvar/1878325" "https://identifiers.org/clinvar:1878325"], :alternate_labels [], :canonical_variation nil, :record_metadata {:type "RecordMetadata", :is_version_of "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1878325", :version "2023-04-10"}, :extensions ({:type "Extension", :name "variation_type", :value "Duplication"} {:type "Extension", :name "entity_type", :value "variation"} {:type "Extension", :name "protein_change", :value []} {:type "Extension", :name "child_ids", :value []} {:type "Extension", :name "allele_id", :value "1935091"} {:type "Extension", :name "subclass_type", :value "SimpleAllele"} {:type "Extension", :name "clinvar_variation", :value "https://identifiers.org/clinvar:1878325"} {:type "Extension", :name "descendant_ids", :value []} {:type "Extension", :name "canonical_expression", :value nil} {:type "Extension", :name "candidate_expressions", :value ()}), :label "NC_000018.9:g.(48556994_48573289)_48573471dup", :id "http://dataexchange.clinicalgenome.org/terms/VariationDescriptor_1878325.2023-04-10", :members [{:type "VariationMember", :expressions [{:type "Expression", :syntax "hgvs.g", :value "NC_000018.9:g.(48556994_48573289)_48573471dup"}]}]}