Open amykglen opened 2 weeks ago
maybe because the predicate in RTX-KG2 got switched to biolink:treats_or_applied_or_studied_to_treat ?
don't think so - when I raise the predicate to treats_or_applied_or_studied_to_treat
, a few in_clinical_trials_for
edges come back from CTKP, but that's it.
OK, I've created a little test script for this:
curl -X POST -d @query.json \
-H 'Content-Type: application/json' \
-H 'accept: application/json' \
https://multiomics.ci.transltr.io/ctkp/query | jq > results.json
which uses the query graph from above, in file query.json
. I'll post the results below.
Results here (removing the logs
section for brevity):
{
"message": {
"knowledge_graph": {
"edges": {
"e66fbfac-0740-3919-adb1-ca8604e33666": {
"attributes": [
{
"attribute_type_id": "biolink:supporting_study",
"attributes": [
{
"attribute_type_id": "clinical_trial_phase",
"value": 4,
"value_type_id": "biolink:ResearchPhaseEnum"
},
{
"attribute_type_id": "tested",
"value": "yes"
},
{
"attribute_type_id": "primary_purpose",
"value": "TREATMENT"
},
{
"attribute_type_id": "intervention_model",
"value": "SINGLE_GROUP"
},
{
"attribute_type_id": "clinical_trial_status",
"value": "WITHDRAWN"
},
{
"attribute_type_id": "start_date",
"value": "2008-12"
},
{
"attribute_type_id": "study_size",
"value": 0,
"value_type_id": "metatype:Integer"
},
{
"attribute_type_id": "enrollment_type",
"value": "ACTUAL"
},
{
"attribute_type_id": "age_range",
"value": "18 years or older"
},
{
"attribute_type_id": "child",
"value": false,
"value_type_id": "metatype:Boolean"
},
{
"attribute_type_id": "adult",
"value": true,
"value_type_id": "metatype:Boolean"
},
{
"attribute_type_id": "older_adult",
"value": true,
"value_type_id": "metatype:Boolean"
},
{
"attribute_type_id": "brief_title",
"value": "Valganciclovir to Treat HHV-8 Associated Multicentric Castleman's Disease",
"value_type_id": "metatype:String"
}
],
"value": "NCT00361933",
"value_url": "https://clinicaltrials.gov/study/NCT00361933?tab=table"
},
{
"attribute_type_id": "clinical_trial_tested_intervention",
"value": "yes",
"value_type_id": "metatype:String"
},
{
"attribute_type_id": "biolink:clinical_approval_status",
"value": "biolink:approved_for_condition",
"value_type_id": "biolink:ClinicalApprovalStatusEnum"
},
{
"attribute_source": "infores:multiomics-clinicaltrials",
"attribute_type_id": "elevate_to_prediction",
"value": false,
"value_type_id": "metatype:Boolean"
},
{
"attribute_source": "infores:multiomics-clinicaltrials",
"attribute_type_id": "intervention_boxed_warning",
"value": "2/2"
},
{
"attribute_source": "infores:multiomics-clinicaltrials",
"attribute_type_id": "biolink:knowledge_level",
"value": "knowledge_assertion"
},
{
"attribute_source": "infores:multiomics-clinicaltrials",
"attribute_type_id": "biolink:agent_type",
"value": "manual_agent"
},
{
"attribute_type_id": "biolink:max_research_phase",
"value": 4,
"value_type_id": "biolink:ResearchPhaseEnum"
}
],
"object": "MONDO:0015564",
"predicate": "biolink:treats",
"sources": [
{
"resource_id": "infores:clinicaltrials",
"resource_role": "supporting_data_source"
},
{
"resource_id": "infores:aact",
"resource_role": "supporting_data_source",
"upstream_resource_ids": [
"infores:clinicaltrials"
]
},
{
"resource_id": "infores:multiomics-clinicaltrials",
"resource_role": "primary_knowledge_source",
"source_record_urls": [
"https://db.systemsbiology.net/gestalt/cgi-pub/KGinfo.pl?id=e66fbfac-0740-3919-adb1-ca8604e33666"
],
"upstream_resource_ids": [
"infores:aact"
]
}
],
"subject": "UNII:GCU97FKN3R"
}
},
"nodes": {
"MONDO:0015564": {
"attributes": [],
"categories": [
"biolink:Disease"
],
"name": "Castleman disease"
},
"UNII:GCU97FKN3R": {
"attributes": [],
"categories": [
"biolink:ChemicalEntity"
],
"name": "VALGANCICLOVIR"
}
}
},
"query_graph": {
"edges": {
"t_edge": {
"object": "on",
"predicates": [
"biolink:treats"
],
"subject": "sn"
}
},
"nodes": {
"on": {
"categories": [
"biolink:Disease"
],
"ids": [
"MONDO:0015564"
]
},
"sn": {
"categories": [
"biolink:ChemicalEntity"
]
}
}
},
"results": [
{
"analyses": [
{
"edge_bindings": {
"t_edge": [
{
"attributes": [],
"id": "e66fbfac-0740-3919-adb1-ca8604e33666"
}
]
},
"resource_id": "infores:multiomics-clinicaltrials"
}
],
"node_bindings": {
"on": [
{
"attributes": [],
"id": "MONDO:0015564"
}
],
"sn": [
{
"attributes": [],
"id": "UNII:GCU97FKN3R"
}
]
},
"resource_id": "infores:multiomics-clinicaltrials"
}
]
}
}
I see one edge in those results, with subject UNII:GCU97FKN3R
and object MONDO:0015564
, and source infores:clinicaltrials
.
I created another little test script to run the same query graph through the RTX-KG2 KP:
curl -X POST -d @query.json \
-H 'Content-Type: application/json' \
-H 'accept: application/json' \
https://kg2cploverdb.ci.transltr.io/query | jq > results2.json
Confirmed, no results:
sramsey-laptop:issue-2414 sramsey$ cat results2.json
{
"logs": [
{
"level": "INFO",
"message": "kg2c: Converting qnode on's 'ids' to equivalent ids we recognize",
"timestamp": "Wed, 13 Nov 2024 17:42:03 GMT"
},
{
"level": "INFO",
"message": "kg2c: After expansion to descendant concepts, object qnode has 5 ids",
"timestamp": "Wed, 13 Nov 2024 17:42:03 GMT"
},
{
"level": "INFO",
"message": "kg2c: Looking up answers to query..",
"timestamp": "Wed, 13 Nov 2024 17:42:03 GMT"
},
{
"level": "INFO",
"message": "kg2c: Found 0 input node answers, 0 output node answers, 0 edges",
"timestamp": "Wed, 13 Nov 2024 17:42:03 GMT"
},
{
"level": "INFO",
"message": "kg2c: Beginning to transform answers to TRAPI format..",
"timestamp": "Wed, 13 Nov 2024 17:42:03 GMT"
},
{
"level": "INFO",
"message": "kg2c: Done with query, returning TRAPI response (0 results)",
"timestamp": "Wed, 13 Nov 2024 17:42:03 GMT"
}
],
"message": {
"knowledge_graph": {
"edges": {},
"nodes": {}
},
"query_graph": {
"edges": {
"t_edge": {
"object": "on",
"predicates": [
"biolink:treats"
],
"subject": "sn"
}
},
"nodes": {
"on": {
"categories": [
"biolink:Disease"
],
"ids": [
"MONDO:0015564"
]
},
"sn": {
"categories": [
"biolink:ChemicalEntity"
]
}
}
},
"results": []
}
}
Looks like UNII:GCU97FKN3R
corresponds to the antiviral "valganciclovir",
https://arax.ncats.io/?r=314336
When I run the originally posted QG through arax.ci.transltr.io
, I see "valganciclovir" as the third result,
and it is indeed provided by the Clinical Trials KP,
Looks like ARAX on CI thinks that the preferred CURIE for valganciclovir is CHEBI:63635
.
So, there is a "treats" like edge between "valganciclovir" and "Castleman" in RTX-KG2.10.1c, see this Cypher query against the kg2canonicalized2.rtx.ai
Neo4j endpoint:
Here is the Cypher query embedded as text:
match (n {id: 'CHEBI:63635'})-[r]->(m {id: 'MONDO:0015564'}) return n,m,r;
but note, the predicate is biolink:treats_or_applied_or_studied_to_treat
, not biolink:treats
.
@amykglen points out that the above edge is actually from SemMedDB, not CTKP. Thank you, Amy! My bad!
FYI - it looks like KG2c only contains CTKP's true predicates (no remapping, as desired):
MATCH (n)-[e]->(m) where e.primary_knowledge_source="infores:biothings-multiomics-clinicaltrials" return distinct e.predicate, count(distinct e)
e.predicate | count(distinct e) |
---|---|
"biolink:biolink_in_clinical_trials_for" | 13459 |
"biolink:biolink_mentioned_in_trials_for" | 14215 |
"biolink:biolink_treats" | 3558 |
Looks like this is a KG2pre bug. I ran the following on kg2101build.rtx.ai
:
zcat kg2-simplified-2.10.1-edges.jsonl.gz | grep MONDO:0015564 | grep UNII:GCU97FKN3R | grep treats
ubuntu@ip-172-31-10-56:~/kg2-build$
But we are clearly getting this edge in the CTKP data file dump:
ubuntu@ip-172-31-10-56:~/kg2-build$ grep MONDO:0015564 clinicaltrialskg_tsv_to_kg_jsonl-edges.jsonl | grep UNII:GCU97FKN3R | grep treats
{"domain_range_exclusion": false, "id": "UNII:GCU97FKN3R---biolink:biolink_treats---None---None---None---MONDO:0015564---ClinicalTrialsKG:", "negated": false, "object": "MONDO:0015564", "predicate": null, "primary_knowledge_source": "ClinicalTrialsKG:", "publications": [], "publications_info": {}, "qualified_object_aspect": null, "qualified_object_direction": null, "qualified_predicate": null, "relation_label": "biolink:treats", "source_predicate": "biolink:biolink_treats", "subject": "UNII:GCU97FKN3R", "update_date": "2008-12-01"}
Transferring this issue to the RTX-KG2 project area
So the edge does appear in clinicaltrialskg_tsv_to_jsonl-edges.jsonl
file:
ubuntu@ip-172-31-10-56:~/kg2-build$ grep MONDO:0015564 clinicaltrialskg_tsv_to_kg_jsonl-edges.jsonl | grep treats
{"domain_range_exclusion": false, "id": "UNII:GCU97FKN3R---biolink:biolink_treats---None---None---None---MONDO:0015564---ClinicalTrialsKG:", "negated": false, "object": "MONDO:0015564", "predicate": null, "primary_knowledge_source": "ClinicalTrialsKG:", "publications": [], "publications_info": {}, "qualified_object_aspect": null, "qualified_object_direction": null, "qualified_predicate": null, "relation_label": "biolink:treats", "source_predicate": "biolink:biolink_treats", "subject": "UNII:GCU97FKN3R", "update_date": "2008-12-01"}
OK, the commits in issue #420 seem to fix this:
ubuntu@ip-172-31-13-50:~/kg2-build$ grep UNII:GCU97FKN3R kg2-merged-2.10.2-edges.jsonl | grep MONDO:0015564 | jq
{
"domain_range_exclusion": false,
"id": "UNII:GCU97FKN3R---biolink:in_clinical_trials_for---None---None---None---MONDO:0015564---ClinicalTrialsKG:",
"negated": false,
"object": "MONDO:0015564",
"predicate": null,
"primary_knowledge_source": "ClinicalTrialsKG:",
"publications": [],
"publications_info": {},
"qualified_object_aspect": null,
"qualified_object_direction": null,
"qualified_predicate": null,
"relation_label": "in_clinical_trials_for",
"source_predicate": "biolink:in_clinical_trials_for",
"subject": "UNII:GCU97FKN3R",
"update_date": "2008-12-01"
}
{
"domain_range_exclusion": false,
"id": "UNII:GCU97FKN3R---biolink:treats---None---None---None---MONDO:0015564---ClinicalTrialsKG:",
"negated": false,
"object": "MONDO:0015564",
"predicate": null,
"primary_knowledge_source": "ClinicalTrialsKG:",
"publications": [],
"publications_info": {},
"qualified_object_aspect": null,
"qualified_object_direction": null,
"qualified_predicate": null,
"relation_label": "treats",
"source_predicate": "biolink:treats",
"subject": "UNII:GCU97FKN3R",
"update_date": "2008-12-01"
}
{
"domain_range_exclusion": false,
"id": "UNII:GCU97FKN3R---biolink:mentioned_in_trials_for---None---None---None---MONDO:0015564---ClinicalTrialsKG:",
"negated": false,
"object": "MONDO:0015564",
"predicate": null,
"primary_knowledge_source": "ClinicalTrialsKG:",
"publications": [],
"publications_info": {},
"qualified_object_aspect": null,
"qualified_object_direction": null,
"qualified_predicate": null,
"relation_label": "mentioned_in_trials_for",
"source_predicate": "biolink:mentioned_in_trials_for",
"subject": "UNII:GCU97FKN3R",
"update_date": "2011-09-13"
}
noticed that, curiously, the below query (castleman) returns 1 edge when sent directly to the CTKP CI API, but no edges when sent directly to the KG2 CI API). odd, since KG2.10.1 includes CTKP... need to look into why this is happening. possible that it's simply due to the fact that CTKP has been updated since the KG2.10.1 build, or maybe it has something to do with the KG2pre ingest, or another part of the system?