RTXteam / RTX-KG2

Build system for the RTX-KG2 biomedical knowledge graph, part of the ARAX reasoning system (https://github.com/RTXTeam/RTX)
MIT License
39 stars 8 forks source link

Update drugbank #276

Closed acevedol closed 1 year ago

acevedol commented 1 year ago

update drugbank knowledge source

saramsey commented 1 year ago

tentatively slated for inclusion in the KG2.8.4pre build

ecwood commented 1 year ago

While testing out the update, I ran the report scripts on the old vs new DrugBank JSON. The report script failed, so 172fedc also features updates to the report script so that it will finish. Below is are the outputs of the report scripts. One important thing to note is that the version in the node name will have the same version number (5.1) since that is what is given in the drugbank.xml file. However, I know that the previous version is 5.1.9 from its name in the S3 bucket and that the current version is 5.1.10 since that was the tag when I downloaded it.

New Old
{
    "_number_of_nodes": 15236,
    "_number_of_edges": 3018286,
    "_report_datetime": "2023-06-20 21:54:08",
    "number_of_nodes_by_curie_prefix": {
        "identifiers_org_registry": 1,
        "DRUGBANK": 15235
    },
    "number_of_nodes_without_category__by_curie_prefix": {},
    "number_of_nodes_by_category_label": {
        "information_resource": 1,
        "chemical_entity": 3008,
        "small_molecule": 12227
    },
    "number_of_nodes_by_source": {
        "identifiers_org_registry:drugbank": 15236
    },
    "number_of_edges_by_predicate_curie": {
        "DRUGBANK:category": 64973,
        "DRUGBANK:drug-interaction": 2866931,
        "DRUGBANK:external-identifier": 19614,
        "DRUGBANK:external-identifier-protein": 93,
        "DRUGBANK:pathway": 3780,
        "DRUGBANK:inhibitor": 4131,
        "DRUGBANK:group": 18032,
        "DRUGBANK:atc-code": 5383,
        "DRUGBANK:atc-code-level": 17177,
        "DRUGBANK:binder": 502,
        "DRUGBANK:agonist": 1390,
        "DRUGBANK:target": 10814,
        "DRUGBANK:antibody": 84,
        "DRUGBANK:ligand": 706,
        "DRUGBANK:activator": 253,
        "DRUGBANK:inducer": 181,
        "DRUGBANK:substrate": 110,
        "DRUGBANK:cofactor": 193,
        "DRUGBANK:antagonist": 2191,
        "DRUGBANK:chaperone": 17,
        "DRUGBANK:modulator": 85,
        "DRUGBANK:downregulator": 27,
        "DRUGBANK:stimulator": 18,
        "DRUGBANK:product_of": 41,
        "DRUGBANK:allosteric_modulator": 12,
        "DRUGBANK:negative_modulator": 32,
        "DRUGBANK:positive_allosteric_modulator": 983,
        "DRUGBANK:inverse_agonist": 29,
        "DRUGBANK:partial_agonist": 84,
        "DRUGBANK:potentiator": 162,
        "DRUGBANK:blocker": 134,
        "DRUGBANK:regulator": 44,
        "DRUGBANK:intercalation": 2,
        "DRUGBANK:inhibitory_allosteric_modulator": 3,
        "DRUGBANK:adduct": 2,
        "DRUGBANK:multitarget": 7,
        "DRUGBANK:stabilization": 10,
        "DRUGBANK:partial_antagonist": 1,
        "DRUGBANK:suppressor": 3,
        "DRUGBANK:weak_inhibitor": 2,
        "DRUGBANK:binding": 10,
        "DRUGBANK:degradation": 1,
        "DRUGBANK:antisense_oligonucleotide": 5,
        "DRUGBANK:incorporation_into_and_destabilization": 1,
        "DRUGBANK:cleavage": 10,
        "DRUGBANK:neutralizer": 2,
        "DRUGBANK:oxidizer": 3,
        "DRUGBANK:inactivator": 6,
        "DRUGBANK:nucleotide_exchange_blocker": 1,
        "DRUGBANK:inhibition_of_synthesis": 1,
        "DRUGBANK:carrier": 1,
        "DRUGBANK:translocation_inhibitor": 2,
        "DRUGBANK:gene_replacement": 1,
        "DRUGBANK:chelator": 2,
        "DRUGBANK:component_of": 4
    },
    "number_of_edges_by_predicate_type": {
        "category": 64973,
        "drug-interaction": 2866931,
        "external-identifier": 19614,
        "external-identifier-protein": 93,
        "pathway": 3780,
        "inhibitor": 4131,
        "group": 18032,
        "atc-code": 5383,
        "atc-code-level": 17177,
        "binder": 502,
        "agonist": 1390,
        "target": 10814,
        "antibody": 84,
        "ligand": 706,
        "activator": 253,
        "inducer": 181,
        "substrate": 110,
        "cofactor": 193,
        "antagonist": 2191,
        "chaperone": 17,
        "modulator": 85,
        "downregulator": 27,
        "stimulator": 18,
        "product of": 41,
        "allosteric modulator": 12,
        "negative modulator": 32,
        "positive allosteric modulator": 983,
        "inverse agonist": 29,
        "partial agonist": 84,
        "potentiator": 162,
        "blocker": 134,
        "regulator": 44,
        "intercalation": 2,
        "inhibitory allosteric modulator": 3,
        "adduct": 2,
        "multitarget": 7,
        "stabilization": 10,
        "partial antagonist": 1,
        "suppressor": 3,
        "weak inhibitor": 2,
        "binding": 10,
        "degradation": 1,
        "antisense oligonucleotide": 5,
        "incorporation into and destabilization": 1,
        "cleavage": 10,
        "neutralizer": 2,
        "oxidizer": 3,
        "inactivator": 6,
        "nucleotide exchange blocker": 1,
        "inhibition of synthesis": 1,
        "carrier": 1,
        "translocation inhibitor": 2,
        "gene replacement": 1,
        "chelator": 2,
        "component of": 4
    },
    "number_of_edges_by_predicate_curie_prefixes": {
        "DRUGBANK": 3018286
    },
    "number_of_predicates_by_predicate_curie_prefixes": {
        "DRUGBANK": 55
    },
    "number_of_edges_by_source": {
        "identifiers_org_registry:drugbank": 3018286
    },
    "types_of_pairs_of_curies_for_xrefs": {},
    "types_of_pairs_of_curies_for_equivs": {},
    "number_of_nodes_by_source_and_category": {
        "identifiers_org_registry:drugbank": {}
    }
}
{
    "_number_of_nodes": 14595,
    "_number_of_edges": 2913688,
    "_report_datetime": "2023-06-20 21:52:51",
    "number_of_nodes_by_curie_prefix": {
        "identifiers_org_registry": 1,
        "DRUGBANK": 14594
    },
    "number_of_nodes_without_category__by_curie_prefix": {},
    "number_of_nodes_by_category_label": {
        "information_resource": 1,
        "chemical_entity": 2682,
        "small_molecule": 11912
    },
    "number_of_nodes_by_source": {
        "identifiers_org_registry:drugbank": 14595
    },
    "number_of_edges_by_predicate_curie": {
        "DRUGBANK:category": 62058,
        "DRUGBANK:drug-interaction": 2768080,
        "DRUGBANK:external-identifier": 19185,
        "DRUGBANK:external-identifier-protein": 93,
        "DRUGBANK:pathway": 3780,
        "DRUGBANK:inhibitor": 3686,
        "DRUGBANK:group": 17278,
        "DRUGBANK:atc-code": 5152,
        "DRUGBANK:atc-code-level": 16436,
        "DRUGBANK:binder": 471,
        "DRUGBANK:agonist": 1344,
        "DRUGBANK:target": 10877,
        "DRUGBANK:antibody": 61,
        "DRUGBANK:ligand": 704,
        "DRUGBANK:activator": 195,
        "DRUGBANK:inducer": 182,
        "DRUGBANK:substrate": 95,
        "DRUGBANK:cofactor": 193,
        "DRUGBANK:antagonist": 2149,
        "DRUGBANK:chaperone": 17,
        "DRUGBANK:modulator": 82,
        "DRUGBANK:downregulator": 11,
        "DRUGBANK:stimulator": 19,
        "DRUGBANK:product_of": 40,
        "DRUGBANK:negative_modulator": 32,
        "DRUGBANK:positive_allosteric_modulator": 983,
        "DRUGBANK:inverse_agonist": 29,
        "DRUGBANK:partial_agonist": 81,
        "DRUGBANK:potentiator": 162,
        "DRUGBANK:blocker": 115,
        "DRUGBANK:intercalation": 2,
        "DRUGBANK:inhibitory_allosteric_modulator": 2,
        "DRUGBANK:allosteric_modulator": 11,
        "DRUGBANK:adduct": 2,
        "DRUGBANK:multitarget": 7,
        "DRUGBANK:stabilization": 9,
        "DRUGBANK:partial_antagonist": 1,
        "DRUGBANK:suppressor": 3,
        "DRUGBANK:weak_inhibitor": 2,
        "DRUGBANK:binding": 10,
        "DRUGBANK:degradation": 1,
        "DRUGBANK:antisense_oligonucleotide": 5,
        "DRUGBANK:incorporation_into_and_destabilization": 1,
        "DRUGBANK:neutralizer": 3,
        "DRUGBANK:cleavage": 9,
        "DRUGBANK:oxidizer": 3,
        "DRUGBANK:inactivator": 6,
        "DRUGBANK:nucleotide_exchange_blocker": 1,
        "DRUGBANK:inhibition_of_synthesis": 1,
        "DRUGBANK:translocation_inhibitor": 2,
        "DRUGBANK:gene_replacement": 1,
        "DRUGBANK:regulator": 10,
        "DRUGBANK:chelator": 2,
        "DRUGBANK:component_of": 4
    },
    "number_of_edges_by_predicate_type": {
        "category": 62058,
        "drug-interaction": 2768080,
        "external-identifier": 19185,
        "external-identifier-protein": 93,
        "pathway": 3780,
        "inhibitor": 3686,
        "group": 17278,
        "atc-code": 5152,
        "atc-code-level": 16436,
        "binder": 471,
        "agonist": 1344,
        "target": 10877,
        "antibody": 61,
        "ligand": 704,
        "activator": 195,
        "inducer": 182,
        "substrate": 95,
        "cofactor": 193,
        "antagonist": 2149,
        "chaperone": 17,
        "modulator": 82,
        "downregulator": 11,
        "stimulator": 19,
        "product of": 40,
        "negative modulator": 32,
        "positive allosteric modulator": 983,
        "inverse agonist": 29,
        "partial agonist": 81,
        "potentiator": 162,
        "blocker": 115,
        "intercalation": 2,
        "inhibitory allosteric modulator": 2,
        "allosteric modulator": 11,
        "adduct": 2,
        "multitarget": 7,
        "stabilization": 9,
        "partial antagonist": 1,
        "suppressor": 3,
        "weak inhibitor": 2,
        "binding": 10,
        "degradation": 1,
        "antisense oligonucleotide": 5,
        "incorporation into and destabilization": 1,
        "neutralizer": 3,
        "cleavage": 9,
        "oxidizer": 3,
        "inactivator": 6,
        "nucleotide exchange blocker": 1,
        "inhibition of synthesis": 1,
        "translocation inhibitor": 2,
        "gene replacement": 1,
        "regulator": 10,
        "chelator": 2,
        "component of": 4
    },
    "number_of_edges_by_predicate_curie_prefixes": {
        "DRUGBANK": 2913688
    },
    "number_of_predicates_by_predicate_curie_prefixes": {
        "DRUGBANK": 54
    },
    "number_of_edges_by_source": {
        "identifiers_org_registry:drugbank": 2913688
    },
    "types_of_pairs_of_curies_for_xrefs": {},
    "types_of_pairs_of_curies_for_equivs": {},
    "number_of_nodes_by_source_and_category": {
        "identifiers_org_registry:drugbank": {}
    }
}
ecwood commented 1 year ago

Another thing to note: based on

    "number_of_nodes_by_source_and_category": {
        "identifiers_org_registry:drugbank": {}
    }

I do not think count_number_of_nodes_by_source_and_category is working.

ecwood commented 1 year ago

This is completed as of KG2.8.4pre, so I am going to close out this issue.

saramsey commented 1 year ago

Great work! Thank you.