biothings / mygene.info

MyGene.info: A BioThings API for gene annotations
http://mygene.info
Other
117 stars 20 forks source link

Load orphanet/orphadata data on gene-disease associations #109

Open andrewsu opened 3 years ago

andrewsu commented 3 years ago

Orphanet provides gene-disease associations for rare diseases under at CC-BY 4.0 license.

See link for "Genes associated with rare diseases" in http://www.orphadata.org/cgi-bin/index.php. Current XML file appears to be continuously updated at http://www.orphadata.org/data/xml/en_product6.xml

andrewsu commented 3 years ago

example record showing association between Alacrimia-choreoathetosis-liver dysfunction syndrome (OrphaCode:404454) and N-glycanase 1 (ENSG00000151092). On the scale of things, this is a well-structured resource that should be relatively simple to parse...

    <Disorder id="22923">
      <OrphaCode>404454</OrphaCode>
      <ExpertLink lang="en">http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&amp;Expert=404454</ExpertLink>
      <Name lang="en">Alacrimia-choreoathetosis-liver dysfunction syndrome</Name>
      <DisorderType id="21394">
        <Name lang="en">Disease</Name>
      </DisorderType>
      <DisorderGroup id="36547">
        <Name lang="en">Disorder</Name>
      </DisorderGroup>
      <DisorderGeneAssociationList count="1">
        <DisorderGeneAssociation>
          <SourceOfValidation>24651605[PMID]</SourceOfValidation>
          <Gene id="22971">
            <Name lang="en">N-glycanase 1</Name>
            <Symbol>NGLY1</Symbol>
            <SynonymList count="3">
              <Synonym lang="en">FLJ11005</Synonym>
              <Synonym lang="en">PNG1</Synonym>
              <Synonym lang="en">peptide-N(4)-(N-acetyl-beta-glucosaminyl)asparagine amidase</Synonym>
            </SynonymList>
            <GeneType id="25993">
              <Name lang="en">gene with protein product</Name>
            </GeneType>
            <ExternalReferenceList count="6">
              <ExternalReference id="126441">
                <Source>Reactome</Source>
                <Reference>Q96IV0</Reference>
              </ExternalReference>
              <ExternalReference id="91956">
                <Source>Ensembl</Source>
                <Reference>ENSG00000151092</Reference>
              </ExternalReference>
              <ExternalReference id="91737">
                <Source>Genatlas</Source>
                <Reference>NGLY1</Reference>
              </ExternalReference>
              <ExternalReference id="91735">
                <Source>HGNC</Source>
                <Reference>17646</Reference>
              </ExternalReference>
              <ExternalReference id="91736">
                <Source>OMIM</Source>
                <Reference>610661</Reference>
              </ExternalReference>
              <ExternalReference id="91738">
                <Source>SwissProt</Source>
                <Reference>Q96IV0</Reference>
              </ExternalReference>
            </ExternalReferenceList>
            <LocusList count="1">
              <Locus id="26865">
                <GeneLocus>3p24.2</GeneLocus>
                <LocusKey>1</LocusKey>
              </Locus>
            </LocusList>
          </Gene>
          <DisorderGeneAssociationType id="25972">
            <Name lang="en">Disease-causing germline mutation(s) (loss of function) in</Name>
          </DisorderGeneAssociationType>
          <DisorderGeneAssociationStatus id="17991">
            <Name lang="en">Assessed</Name>
          </DisorderGeneAssociationStatus>
        </DisorderGeneAssociation>
      </DisorderGeneAssociationList>
    </Disorder>