saezlab / decoupler-py

Python package to perform enrichment analysis from omics data.
https://decoupler-py.readthedocs.io/
GNU General Public License v3.0
157 stars 23 forks source link

Cannot convert gene symbol to rat #110

Closed wangjiawen2013 closed 6 months ago

wangjiawen2013 commented 6 months ago

Hi, I want to translate human gene symbols to rat while the following error ocurred:

In [25]: test = dc.translate_net(markers, target_organism="rat", unique_by = ('cell_type', 'genesymbol'))
---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
Cell In[25], line 1
----> 1 test = dc.translate_net(markers, target_organism="rat", unique_by = ('cell_type', 'genesymbol'))

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/decoupler/omnip.py:695, in translate_net(net, columns, source_organism, target_organism, id_type, unique_by, **kwargs)
    692 hom_net = net.copy()
    694 # Translate
--> 695 hom_net = orthology.translate_df(
    696     df=hom_net,
    697     target=_target_organism,
    698     cols=columns,
    699     source=_source_organism,
    700 )
    702 unique_by = common.to_list(unique_by)
    704 if unique_by and all(c in hom_net.columns for c in unique_by):
    705 
    706     # Remove duplicated based on source and target

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:2262, in translate_df(df, target, source, cols, id_type, only_swissprot, oma, homologene, ensembl, oma_rel_type, oma_score, ensembl_hc, ensembl_types, **kwargs)
   2259 args.pop('manager')
   2260 args.pop('kwargs')
-> 2262 return manager.translate_df(**args, **kwargs)

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:620, in OrthologyManager.translate_df(self, df, target, source, cols, id_type, only_swissprot, oma, homologene, ensembl, oma_rel_type, oma_score, ensembl_hc, ensembl_types, **kwargs)
    618 args.pop('self')
    619 args['id_type'] = _id_type
--> 620 ortho_df = self.get_df(**args)
    622 table = self.which_table(
    623     target = target,
    624     source = source,
   (...)
    627     resource = 'oma',
    628 )
    630 df = table.translate_df(
    631     df = df,
    632     cols = [c for c, i in cols.items() if i == _id_type],
    633     ortho_df = ortho_df,
    634 )

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:515, in OrthologyManager.get_df(self, target, source, id_type, only_swissprot, oma, homologene, ensembl, oma_rel_type, oma_score, ensembl_hc, ensembl_types, full_records, **kwargs)
    511     if not param[resource]:
    513         continue
--> 515     table = self.which_table(
    516         target = target,
    517         source = source,
    518         only_swissprot = only_swissprot,
    519         id_type = id_type,
    520         resource = resource,
    521     )
    523     result.append(
    524         table.df(
    525             full_records = full_records,
   (...)
    531         )
    532     )
    534 return pd.concat(result)

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:233, in OrthologyManager.which_table(self, target, source, only_swissprot, resource, id_type)
    229 self.expiry[key] = time.time()
    231 if key not in self.tables:
--> 233     self.load(key)
    235 if key in self.tables:
    237     return self.tables[key]

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:242, in OrthologyManager.load(self, key)
    240 def load(self, key):
--> 242     self.tables[key] = globals()[f'{key.resource.capitalize()}Orthology'](
    243         target = key.target,
    244         source = key.source,
    245         only_swissprot = key.only_swissprot,
    246         id_type = key.id_type,
    247     )

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:1492, in OmaOrthology.__init__(self, target, source, id_type, only_swissprot, rel_type, score)
   1462 def __init__(
   1463         self,
   1464         target: int | str,
   (...)
   1472         score: float | None = None,
   1473     ):
   1474     """
   1475     Orthology translation with Ensembl data.
   1476 
   (...)
   1489             Lower threshold for similarity metric.
   1490     """
-> 1492     ProteinOrthology.__init__(**locals())

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:869, in ProteinOrthology.__init__(self, target, source, id_type, only_swissprot, **kwargs)
    867 self.load_proteome(self.source)
    868 self._set_param(kwargs, *self._param)
--> 869 self.load()

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/utils/orthology.py:1506, in OmaOrthology.load(self)
   1502 if self._from_pickle():
   1504     return
-> 1506 oma_data = oma_input.oma_orthologs(
   1507     organism_a = self.source,
   1508     organism_b = self.target,
   1509     id_type = self.id_type,
   1510 )
   1511 self.data = collections.defaultdict(set)
   1513 for rec in oma_data:

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/inputs/oma.py:115, in oma_orthologs(organism_a, organism_b, id_type, rel_type, score, return_df)
    113 n_pages = float(c.resp_headers_dict.get('x-total-count', 1e8)) / 100
    114 page += 1
--> 115 data = inputs_common.json_read(c.result)
    117 for rec in data:
    119     if (
    120         (score and rec['score'] < score) or
    121         (rel_type and rec['rel_type'] not in rel_type)
    122     ):

File ~/programs/miniconda3/envs/py311/lib/python3.11/site-packages/pypath/inputs/common.py:350, in json_read(data)
    346     data = json.load(data)
    348 elif isinstance(data, str):
--> 350     data = json.loads(data)
    352 return data

File ~/programs/miniconda3/envs/py311/lib/python3.11/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File ~/programs/miniconda3/envs/py311/lib/python3.11/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
    332 def decode(self, s, _w=WHITESPACE.match):
    333     """Return the Python representation of ``s`` (a ``str`` instance
    334     containing a JSON document).
    335 
    336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338     end = _w(s, end).end()
    339     if end != len(s):

File ~/programs/miniconda3/envs/py311/lib/python3.11/json/decoder.py:353, in JSONDecoder.raw_decode(self, s, idx)
    344 """Decode a JSON document from ``s`` (a ``str`` beginning with
    345 a JSON document) and return a 2-tuple of the Python
    346 representation and the index in ``s`` where the document ended.
   (...)
    350 
    351 """
    352 try:
--> 353     obj, end = self.scan_once(s, idx)
    354 except StopIteration as err:
    355     raise JSONDecodeError("Expecting value", s, err.value) from None

JSONDecodeError: Expecting ',' delimiter: line 1 column 212287 (char 212286)
PauBadiaM commented 6 months ago

Hi @wangjiawen2013,

Could you try installing the latest versions of omnipath and pypath and try again?

pip install omnipath==1.0.8
pip install pypath-omnipath==0.16.10

Let me know how it goes.

wangjiawen2013 commented 6 months ago

Hi, I installed decouple/omnipath/pypath-omnipath four days ago, so they'are already the latest version.

PauBadiaM commented 6 months ago

Hi @deeenes, could you take a look? I think it is related to connection issues

deeenes commented 6 months ago

Looks like an incomplete transmission, easiest is to delete the corrupt cache file. To do this, open the log from the session where the error occurred (or run the whole thing again, and open the log when you see the error):

import pypath
import decoupler as dc

test = dc.translate_net(markers, target_organism='rat', unique_by = ('cell_type', 'genesymbol'))

# error happens

pypath.log()

See the bottom of the log for the last accessed cache file and delete it. You'll see something like this:

430892 [2024-02-05 16:51:28] [curl] Creating Curl object to retrieve data from `https://omabrowser.org/api/pairs/9606/10090/?page=21&per_page=1000`
 430893 [2024-02-05 16:51:28] [curl] Cache file path: `/home/denes/.cache/pypath/46b04a786b141726f4b3fd9c00276f29-`
 430894 [2024-02-05 16:51:28] [curl] Setting up and calling pycurl.
 430895 [2024-02-05 16:51:34] [curl] Opening file `/home/denes/.cache/pypath/46b04a786b141726f4b3fd9c00276f29-`
 430896 [2024-02-05 16:51:34] [curl] Extracting data from file type `plain`
 430897 [2024-02-05 16:51:34] [curl] Opening plain text file `/home/denes/.cache/pypath/46b04a786b141726f4b3fd9c00276f29-`.
 430898 [2024-02-05 16:51:34] [curl] Contents of `/home/denes/.cache/pypath/46b04a786b141726f4b3fd9c00276f29-` has been read and the file has been closed.

In the example above, the /home/denes/.cache/pypath/46b04a786b141726f4b3fd9c00276f29- file has to be deleted. Search for the last one, which caused the error. Finally, try to run translate_net again.

wangjiawen2013 commented 6 months ago

Thanks, this solved my problem successfully