RobokopU24 / Feedback

Feedback on the ROBOKOP project
https://robokop.renci.org
0 stars 0 forks source link

Automat ROBOKOP KG metadata #135

Closed karafecho closed 1 year ago

karafecho commented 1 year ago

This issue is to report that the Automat metadata for several sources within the ROBOKOP KG is incomplete (e.g., missing URLs). Moreover, it's missing the metadata (e.g., missing URL) for ROBOKOP KG itself (http://robokopkg.renci.org/browser/).

{
  "graph_id": "RobokopKG",
  "graph_version": "b1e9e5d676b44b26",
  "sources": [
    {
      "source_id": "GWASCatalog",
      "source_version": "8_23_2022",
      "release_version": "93b30aaae5dc4dbd",
      "parsing_version": "1.0",
      "supplementation_version": "1.0",
      "normalization_scheme": {
        "node_normalization_version": "2.0.9",
        "edge_normalization_version": "v.2.4.4",
        "normalization_code_version": "1.0",
        "conflation": true,
        "strict": true
      },
      "merge_strategy": "default",
      "normalization_version": "2.0.9_v.2.4.4_1.0_conflated_strict",
      "provenance": "infores:gwas-catalog",
      "description": "A graph containing the NHGRI-EBI Catalog of human genome-wide association studies.",
      "source_data_url": "https://www.ebi.ac.uk/gwas/docs/file-downloads",
      "license": "https://www.ebi.ac.uk/gwas/",
      "attribution": "https://www.ebi.ac.uk/gwas/",
      "normalized_nodes.jsonl": {
        "nodes": 236692
      },
      "normalized_edges.jsonl": {
        "edges": 344513
      },
      "supp_norm_nodes.jsonl": {
        "nodes": 57775
      },
      "supp_norm_edges.jsonl": {
        "edges": 1641156
      }
    },
    {
      "source_id": "GTEx",
      "source_version": "8",
      "release_version": "5494d8c3a43517fb",
      "parsing_version": "1.2",
      "supplementation_version": "1.0",
      "normalization_scheme": {
        "node_normalization_version": "2.0.9",
        "edge_normalization_version": "v3.1.2",
        "normalization_code_version": "1.0",
        "conflation": true,
        "strict": true
      },
      "merge_strategy": "default",
      "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
      "provenance": "infores:gtex",
      "description": "A graph containing eqtl and sqtl information from the GTEx Portal. Also includes genes that the variants lie within or near.",
      "source_data_url": "https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/",
      "license": "https://www.gtexportal.org/home/documentationPage",
      "attribution": "https://www.gtexportal.org/home/documentationPage",
      "normalized_nodes.jsonl": {
        "nodes": 4899906
      },
      "normalized_edges.jsonl": {
        "edges": 14922458
      },
      "supp_norm_nodes.jsonl": {
        "nodes": 59527
      },
      "supp_norm_edges.jsonl": {
        "edges": 34305056
      }
    }
  ],
  "subgraphs": [
    {
      "graph_id": "Baseline",
      "release_version": "8e2d178d6a273b3e",
      "merge_strategy:": "default",
      "graph_metadata": {
        "graph_id": "Baseline",
        "graph_version": "8e2d178d6a273b3e",
        "sources": [
          {
            "source_id": "Biolink",
            "source_version": "7_28_2021",
            "release_version": "825adca9e0d39262",
            "parsing_version": "1.2",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:sri-reference-kg",
            "description": "A graph based on the Monarch API (https://api.monarchinitiative.org/).",
            "source_data_url": "https://api.monarchinitiative.org/api/",
            "license": "https://monarchinitiative.org/about/licensing",
            "attribution": "https://monarchinitiative.org/about/monarch",
            "normalized_nodes.jsonl": {
              "nodes": 237518
            },
            "normalized_edges.jsonl": {
              "edges": 3385432
            }
          },
          {
            "source_id": "CHEBIProps",
            "source_version": "3_1_2023",
            "release_version": "261cf6d76537f251",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:chebi-properties",
            "description": "",
            "source_data_url": "",
            "license": "",
            "attribution": "",
            "normalized_nodes.jsonl": {
              "nodes": 23641
            },
            "normalized_edges.jsonl": {
              "edges": 0
            }
          },
          {
            "source_id": "CTD",
            "source_version": "March_2023",
            "release_version": "9b2586ec80419bad",
            "parsing_version": "1.2",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:ctd",
            "description": "A subset of data from the Comparative Toxicogenomics Database",
            "source_data_url": "http://ctdbase.org/reports/",
            "license": "http://ctdbase.org/about/publications/#citing",
            "attribution": "http://ctdbase.org/about/",
            "normalized_nodes.jsonl": {
              "nodes": 25930
            },
            "normalized_edges.jsonl": {
              "edges": 155938
            }
          },
          {
            "source_id": "DrugCentral",
            "source_version": "8_22_2022",
            "release_version": "fae0392071bea945",
            "parsing_version": "1.3",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:drugcentral",
            "description": "DrugCentral is an online drug information resource created and maintained by Division of Translational Informatics at University of New Mexico in collaboration with the IDG.",
            "source_data_url": "https://drugcentral.org/download",
            "license": "https://drugcentral.org/privacy",
            "attribution": "https://drugcentral.org/about",
            "normalized_nodes.jsonl": {
              "nodes": 13126
            },
            "normalized_edges.jsonl": {
              "edges": 233225
            }
          },
          {
            "source_id": "GtoPdb",
            "source_version": "2022.4",
            "release_version": "6225d1ee1cd7c43c",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:gtopdb",
            "description": "A graph based on the IUPHAR Guide to Pharmacology (https://www.guidetopharmacology.org/)",
            "source_data_url": "http://www.guidetopharmacology.org/",
            "license": "https://www.guidetopharmacology.org/about.jsp#license",
            "attribution": "https://www.guidetopharmacology.org/citing.jsp",
            "normalized_nodes.jsonl": {
              "nodes": 9613
            },
            "normalized_edges.jsonl": {
              "edges": 15412
            }
          },
          {
            "source_id": "Hetio",
            "source_version": "1.0",
            "release_version": "c428d1c37f4a3bac",
            "parsing_version": "1.3",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:hetionet",
            "description": "Hetionet is an integrative network of biomedical knowledge assembled from 29 different databases of genes, compounds, diseases, and more. The network combines over 50 years of biomedical information into a single resource, consisting of 47,031 nodes (11 types) and 2,250,197 relationships (24 types).(source https://het.io)",
            "source_data_url": "https://github.com/hetio/hetionet/blob/master/hetnet/json/hetionet-v1.0.json.bz2",
            "license": "https://het.io/about/",
            "attribution": "https://het.io/about/",
            "normalized_nodes.jsonl": {
              "nodes": 41390
            },
            "normalized_edges.jsonl": {
              "edges": 2109944
            }
          },
          {
            "source_id": "HGNC",
            "source_version": "3_24_2023",
            "release_version": "ca4ee805cf6c1b9a",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:hgnc",
            "description": "HGNC gene family and gene relationships",
            "source_data_url": "ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/",
            "license": "https://www.genenames.org/about/",
            "attribution": "https://www.genenames.org/about/",
            "normalized_nodes.jsonl": {
              "nodes": 26467
            },
            "normalized_edges.jsonl": {
              "edges": 29928
            }
          },
          {
            "source_id": "HMDB",
            "source_version": "5.0",
            "release_version": "41b42a8cbb1aff68",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:hmdb",
            "description": "A graph based on the Human Metabolome DataBase (hmdb.org)",
            "source_data_url": "https://translator.ncats.io/hmdb-knowledge-beacon",
            "license": "https://hmdb.ca/about",
            "attribution": "https://hmdb.ca/about#cite",
            "normalized_nodes.jsonl": {
              "nodes": 94283
            },
            "normalized_edges.jsonl": {
              "edges": 1411427
            }
          },
          {
            "source_id": "HumanGOA",
            "source_version": "2023-03-06",
            "release_version": "b5711e96955a7120",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:goa",
            "description": "Human Gene Ontology Annotations from the GO consortium.",
            "source_data_url": "ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/",
            "license": "https://www.ebi.ac.uk/about/terms-of-use/",
            "attribution": "https://www.ebi.ac.uk/GOA/publications",
            "normalized_nodes.jsonl": {
              "nodes": 38435
            },
            "normalized_edges.jsonl": {
              "edges": 296200
            }
          },
          {
            "source_id": "IntAct",
            "source_version": "7_13_2022",
            "release_version": "bd29aaab1e9f29be",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:intact",
            "description": "Molecular (Gene-Gene) interactions from EBI IntAct (https://www.ebi.ac.uk/intact/)",
            "source_data_url": "https://www.ebi.ac.uk/intact/",
            "license": "https://www.ebi.ac.uk/about/terms-of-use/",
            "attribution": "http://europepmc.org/article/MED/24234451",
            "normalized_nodes.jsonl": {
              "nodes": 91704
            },
            "normalized_edges.jsonl": {
              "edges": 817101
            }
          },
          {
            "source_id": "MONDOProps",
            "source_version": "3_23_2023",
            "release_version": "2f2c121d55c21991",
            "parsing_version": "1.0",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:mondo",
            "description": "",
            "source_data_url": "",
            "license": "",
            "attribution": "",
            "normalized_nodes.jsonl": {
              "nodes": 22465
            },
            "normalized_edges.jsonl": {
              "edges": 0
            }
          },
          {
            "source_id": "OntologicalHierarchy",
            "source_version": "2023-03-19",
            "release_version": "aa6d68fa2a61394f",
            "parsing_version": "1.2",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:ontological-hierarchy",
            "description": "",
            "source_data_url": "",
            "license": "",
            "attribution": "",
            "normalized_nodes.jsonl": {
              "nodes": 3059400
            },
            "normalized_edges.jsonl": {
              "edges": 60195854
            }
          },
          {
            "source_id": "PANTHER",
            "source_version": "17.0",
            "release_version": "4cbf8bfed2fa43ef",
            "parsing_version": "1.1",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:panther",
            "description": "Gene families and pathways from PANTHER",
            "source_data_url": "ftp.pantherdb.org/sequence_classifications/",
            "license": "http://pantherdb.org/tou.jsp",
            "attribution": "http://pantherdb.org/publications.jsp#HowToCitePANTHER",
            "normalized_nodes.jsonl": {
              "nodes": 48902
            },
            "normalized_edges.jsonl": {
              "edges": 834126
            }
          },
          {
            "source_id": "PHAROS",
            "source_version": "v6_13_4",
            "release_version": "61f21fb1e4d4d0a0",
            "parsing_version": "1.3",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:pharos",
            "description": "",
            "source_data_url": "",
            "license": "",
            "attribution": "",
            "normalized_nodes.jsonl": {
              "nodes": 302706
            },
            "normalized_edges.jsonl": {
              "edges": 562322
            }
          },
          {
            "source_id": "STRING-DB",
            "source_version": "v11.5",
            "release_version": "be8ebb85a6eef372",
            "parsing_version": "1.0",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:STRING",
            "description": "",
            "source_data_url": "",
            "license": "",
            "attribution": "",
            "normalized_nodes.jsonl": {
              "nodes": 18401
            },
            "normalized_edges.jsonl": {
              "edges": 8181618
            }
          },
          {
            "source_id": "UberGraph",
            "source_version": "2023-02-12",
            "release_version": "45f048c34bbf140a",
            "parsing_version": "1.2",
            "supplementation_version": "1.0",
            "normalization_scheme": {
              "node_normalization_version": "2.0.9",
              "edge_normalization_version": "v3.1.2",
              "normalization_code_version": "1.0",
              "conflation": true,
              "strict": true
            },
            "merge_strategy": "default",
            "normalization_version": "2.0.9_v3.1.2_1.0_conflated_strict",
            "provenance": "infores:ubergraph",
            "description": "",
            "source_data_url": "",
            "license": "",
            "attribution": "",
            "normalized_nodes.jsonl": {
              "nodes": 2993752
            },
            "normalized_edges.jsonl": {
              "edges": 3557507
            }
          }
        ],
        "subgraphs": [],
        "build_status": "stable",
        "build_time": "03-24-23 15:05:08",
        "build_error": null,
        "final_node_count": 3642514,
        "final_edge_count": 81785256,
        "merged_nodes": 3405219,
        "merged_edges": 778
      }
    }
  ],
  "build_status": "stable",
  "build_time": "03-24-23 16:46:49",
  "build_error": null,
  "final_node_count": 8660830,
  "final_edge_count": 132099619,
  "merged_nodes": 235584,
  "merged_edges": 898820
}
EvanDietzMorris commented 1 year ago

Thanks @karafecho .. I will make sure all of the sources have the source metadata populated before this next build. Note that robokopkg itself is the graph and not a data source and currently none of the graphs have metadata descriptions or URLs associated with them. If we want to expose a URL associated with a graph it needs to be in the new graph description field, or we need to add an additional field for a graph URL. That being said, there is not really a URL for these graphs except for automat or robokop.renci.org which presumably the user is already at. (Note that robokopkg.renci.org is not the same graph as robokopkg hosted on automat, which is what robokop.renci.org points to. Robokopkg.renci.org has a couple additional knowledge sources added. Perhaps we should rename that one to avoid confusion.)

karafecho commented 1 year ago

To clarify, the text descriptions + URLs that I provided were for the data sources from which the graphs are created. For the graph descriptions, I think users will be interested in the node and edge counts, but probably nothing more than that, except in the case of "super users". For consistency, I suggested that you include a URL for ROBOKOP KG, which I thought was accessible at robokopkg.renci.org. Indeed, that's what we've been advertising in papers, presentations, and directly to potential users.

I added a comment that I think users will find it confusing to list ROBOKOP KG as an Automat API, especially when that metadata shows many of the same knowledge sources that are listed under Automat, but perhaps I'll be proven wrong.