cbrl-nuces / GOntoSim

Semantic Similarity
5 stars 1 forks source link

Python recursion error #2

Open karolina-precisionlife opened 10 months ago

karolina-precisionlife commented 10 months ago

Hey, when I try to run your code I run into python recursion errors (even with the example gene lists). This is not an issue when using any other method than GOntoSlim.

from goatools.obo_parser import GODag
from goatools.base import get_godag
import pandas as pd
import requests, sys

from GOntoSim import Similarity_of_Two_GOTerms, Similarity_of_Set_of_GOTerms, Semantic_Value

go = get_godag("go-basic.obo", optional_attrs={'relationship'})

gene1 = ['GO:0004022','GO:0004024', 'GO:0004174', 'GO:0046872','GO:0008270','GO:0004023', 'GO:0016491']
gene2 = ['GO:0009055','GO:0005515','GO:0046872','GO:0008270','GO:0020037']

unique_goterms = list(set().union(gene1, gene2))
S_values = [(x,Semantic_Value(x, go, 'Baseline_LCA_avg')) for x in unique_goterms ]
S_values = dict(S_values)
Similarity_of_Set_of_GOTerms(gene1, gene2, 'GOntoSim', S_values)

Traceback:

RecursionError                            Traceback (most recent call last)

File [~/Code/cross-study-vis/GOntoSim.py:796](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/GOntoSim.py:796), in Similarity_of_Set_of_GOTerms(set1, set2, method, S_values)
    793     Sim1.append([])
    794     for goid in set2:
    795         #print (goterm , goid)
--> 796         Sim1[idx].append((goterm, goid,(Similarity_of_Two_GOTerms(goterm, goid, go, method, S_values))))
    797 #print(Sim1)
    798 #print([y[0][0] for y in Sim1])
    799 #print([[y[1] for y in  x] for x in Sim1])
    800 #print([y[1] for y in Sim1[0]])
    801 
    802 #print(pd.DataFrame(data = [[y[2] for y in  x] for x in Sim1], index = [y[0][0] for y in Sim1], columns = [y[1] for y in Sim1[0]]))
    804 for idx, goterm in enumerate(set2):

File [~/Code/cross-study-vis/GOntoSim.py:669](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/GOntoSim.py:669), in Similarity_of_Two_GOTerms(go_id1, go_id2, go, method, S_values)
    667 else:   
    668     if go_id1 != 'GO:0003674' and go_id1 !='GO:0005575' and go_id1 != 'GO:0008150' and  go_id2 != 'GO:0003674' and go_id2 !='GO:0005575' and go_id2 != 'GO:0008150':
--> 669         hcd = highest_common_descendant((go_id1, go_id2), go)
    670         if hcd != 0:
    671             hcd_depth = go[hcd].depth

File [~/Code/cross-study-vis/GOntoSim.py:464](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/GOntoSim.py:464), in highest_common_descendant(goterms, godag)
    458 '''
    459     This function gets the nearest common descendant
    460     using the above function.
    461     Only returns single most specific - assumes unique exists.
    462 '''
    463 # Take the element at minimum depth.
--> 464 common_children = common_children_go_ids(goterms, godag)
    465 if len(common_children) != 0:
    466     return min(common_children, key=lambda t: godag[t].depth)

File [~/Code/cross-study-vis/GOntoSim.py:446](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/GOntoSim.py:446), in common_children_go_ids(goids, godag)
    444 rec = godag[goids[0]]
    445 #candidates = rec.get_all_children()
--> 446 candidates = rec.get_all_lower()
    447 candidates.update({goids[0]})
    448 # Find intersection with second to nth goid

File [~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:255](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:255), in GOTerm.get_all_lower(self)
    253 for lower in self.get_goterms_lower():
    254     all_lower.add(lower.item_id)
--> 255     all_lower |= lower.get_all_lower()
    256 return all_lower

File [~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:255](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:255), in GOTerm.get_all_lower(self)
    253 for lower in self.get_goterms_lower():
    254     all_lower.add(lower.item_id)
--> 255     all_lower |= lower.get_all_lower()
    256 return all_lower

    [... skipping similar frames: GOTerm.get_all_lower at line 255 (2964 times)]

File [~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:255](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:255), in GOTerm.get_all_lower(self)
    253 for lower in self.get_goterms_lower():
    254     all_lower.add(lower.item_id)
--> 255     all_lower |= lower.get_all_lower()
    256 return all_lower

File [~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:253](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:253), in GOTerm.get_all_lower(self)
    251 """Return all parent GO IDs through both reverse 'is_a' and all relationships."""
    252 all_lower = set()
--> 253 for lower in self.get_goterms_lower():
    254     all_lower.add(lower.item_id)
    255     all_lower |= lower.get_all_lower()

File [~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:295](https://vscode-remote+wsl-002bubuntu-002d18-002e04.vscode-resource.vscode-cdn.net/home/karolina/Code/cross-study-vis/~/Code/cross-study-vis/cross-study-vis-venv/lib/python3.9/site-packages/goatools/obo_parser.py:295), in GOTerm.get_goterms_lower(self)
    292 """Returns a set containing children and all reverse-relationship GO Terms."""
    293 # Requires GODag is created with 'relationship' in optional_attrs argument
    294 # pylint: disable=no-member
--> 295 return set.union(self.children, *self.relationship_rev.values())

RecursionError: maximum recursion depth exceeded while calling a Python object

As you see I am using the GOntoSim.py , is that the correct one to use? the SImilarity.py looks quite similar but doesn't need the semantic similarity to be calculated prior?

I am super happy smb wrote smth for the GO similarity in python as most of it is in R :/

Thanks a lot

A-Kamran commented 10 months ago

Hi,

I've attempted to replicate the issue you reported but couldn't do so successfully. The GOntoSim method is running fine for me (even on a completely new machine). Even running the exact same code you have provided in this issue gives a similarity value as can be seen in the attached screenshot. It's possible the issue is environment-specific.

Please ensure you're using the latest version and provide more specific details if the problem persists.

Your feedback is valuable, and we're here to assist further if needed. Screenshot from 2023-10-19 16-26-54

karolina-precisionlife commented 10 months ago

Hey thanks a lot.

I have attempted to rerun it in a new venv but still getting the same issue. I am also getting a slighly different print output for some reason (additional GOID printed?):

python3 gontsim_test.py
  EXISTS: go-basic.obo
go-basic.obo: fmt(1.2) rel(2022-09-19) 46,823 Terms; optional_attrs(relationship)
  EXISTS: go-basic.obo
go-basic.obo: fmt(1.2) rel(2022-09-19) 46,823 Terms; optional_attrs(relationship)
GO:0003674
GO:0022900
GO:0005488
Traceback (most recent call last):
  File "/home/karolina/Code/cross-study-vis/gontsim_test.py", line 17, in <module>
    print(Similarity_of_Set_of_GOTerms(gene1, gene2, 'GOntoSim', S_values))
  File "/home/karolina/Code/cross-study-vis/GOntoSim.py", line 796, in Similarity_of_Set_of_GOTerms
    Sim1[idx].append((goterm, goid,(Similarity_of_Two_GOTerms(goterm, goid, go, method, S_values))))
  File "/home/karolina/Code/cross-study-vis/GOntoSim.py", line 669, in Similarity_of_Two_GOTerms
    hcd = highest_common_descendant((go_id1, go_id2), go)
  File "/home/karolina/Code/cross-study-vis/GOntoSim.py", line 464, in highest_common_descendant
    common_children = common_children_go_ids(goterms, godag)
  File "/home/karolina/Code/cross-study-vis/GOntoSim.py", line 446, in common_children_go_ids
    candidates = rec.get_all_lower()
  File "/home/karolina/Code/cross-study-vis/.venv/lib/python3.9/site-packages/goatools/obo_parser.py", line 251, in get_all_lower
    all_lower |= lower.get_all_lower()
  File "/home/karolina/Code/cross-study-vis/.venv/lib/python3.9/site-packages/goatools/obo_parser.py", line 251, in get_all_lower
    all_lower |= lower.get_all_lower()
  File "/home/karolina/Code/cross-study-vis/.venv/lib/python3.9/site-packages/goatools/obo_parser.py", line 251, in get_all_lower
    all_lower |= lower.get_all_lower()
  [Previous line repeated 990 more times]
  File "/home/karolina/Code/cross-study-vis/.venv/lib/python3.9/site-packages/goatools/obo_parser.py", line 249, in get_all_lower
    for lower in self.get_goterms_lower():
  File "/home/karolina/Code/cross-study-vis/.venv/lib/python3.9/site-packages/goatools/obo_parser.py", line 291, in get_goterms_lower
    return set.union(self.children, *self.relationship_rev.values())
RecursionError: maximum recursion depth exceeded while calling a Python object

My pip freeze:

asttokens==2.4.0
backcall==0.2.0
certifi==2023.7.22
charset-normalizer==3.3.1
comm==0.1.4
debugpy==1.8.0
decorator==5.1.1
docopt==0.6.2
et-xmlfile==1.1.0
exceptiongroup==1.1.3
executing==2.0.0
goatools==1.3.9
idna==3.4
importlib-metadata==6.8.0
ipykernel==6.25.2
ipython==8.16.1
jedi==0.19.1
joblib==1.3.2
jupyter_client==8.4.0
jupyter_core==5.4.0
matplotlib-inline==0.1.6
nest-asyncio==1.5.8
numpy==1.26.1
openpyxl==3.1.2
packaging==23.2
pandas==2.1.1
parso==0.8.3
patsy==0.5.3
pexpect==4.8.0
pickleshare==0.7.5
platformdirs==3.11.0
prompt-toolkit==3.0.39
psutil==5.9.6
ptyprocess==0.7.0
pure-eval==0.2.2
pydot==1.4.2
Pygments==2.16.1
pyparsing==3.1.1
python-dateutil==2.8.2
pytz==2023.3.post1
pyzmq==25.1.1
requests==2.31.0
scikit-learn==1.3.1
scipy==1.11.3
six==1.16.0
stack-data==0.6.3
statsmodels==0.14.0
threadpoolctl==3.2.0
tornado==6.3.3
traitlets==5.11.2
typing_extensions==4.8.0
tzdata==2023.3
urllib3==2.0.7
wcwidth==0.2.8
XlsxWriter==3.1.9
zipp==3.17.0
A-Kamran commented 10 months ago

Can you please share the gene ontology file you are using? Also, let me know your Python version.