Closed Ryeeeeeeeeee closed 1 month ago
Hello, can you please share the full traceback?
Hi
I get the same error when using both decoupler.get_resource('MSigDB', organism = 'mouse')
and decoupler.translate_net()
.
Traceback:
---------------------------------------------------------------------------
error Traceback (most recent call last)
Cell In[11], line 1
----> 1 mouse_msigdb = decoupler.translate_net(msigdb, target_organism = 'mouse', unique_by = ('geneset', 'genesymbol'))
2 mouse_msigdb
File /opt/conda/lib/python3.10/site-packages/decoupler/omnip.py:695, in translate_net(net, columns, source_organism, target_organism, id_type, unique_by, **kwargs)
692 hom_net = net.copy()
694 # Translate
--> 695 hom_net = orthology.translate_df(
696 df=hom_net,
697 target=_target_organism,
698 cols=columns,
699 source=_source_organism,
700 )
702 unique_by = common.to_list(unique_by)
704 if unique_by and all(c in hom_net.columns for c in unique_by):
705
706 # Remove duplicated based on source and target
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:2262, in translate_df(df, target, source, cols, id_type, only_swissprot, oma, homologene, ensembl, oma_rel_type, oma_score, ensembl_hc, ensembl_types, **kwargs)
2259 args.pop('manager')
2260 args.pop('kwargs')
-> 2262 return manager.translate_df(**args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:620, in OrthologyManager.translate_df(self, df, target, source, cols, id_type, only_swissprot, oma, homologene, ensembl, oma_rel_type, oma_score, ensembl_hc, ensembl_types, **kwargs)
618 args.pop('self')
619 args['id_type'] = _id_type
--> 620 ortho_df = self.get_df(**args)
622 table = self.which_table(
623 target = target,
624 source = source,
(...)
627 resource = 'oma',
628 )
630 df = table.translate_df(
631 df = df,
632 cols = [c for c, i in cols.items() if i == _id_type],
633 ortho_df = ortho_df,
634 )
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:515, in OrthologyManager.get_df(self, target, source, id_type, only_swissprot, oma, homologene, ensembl, oma_rel_type, oma_score, ensembl_hc, ensembl_types, full_records, **kwargs)
511 if not param[resource]:
513 continue
--> 515 table = self.which_table(
516 target = target,
517 source = source,
518 only_swissprot = only_swissprot,
519 id_type = id_type,
520 resource = resource,
521 )
523 result.append(
524 table.df(
525 full_records = full_records,
(...)
531 )
532 )
534 return pd.concat(result)
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:233, in OrthologyManager.which_table(self, target, source, only_swissprot, resource, id_type)
229 self.expiry[key] = time.time()
231 if key not in self.tables:
--> 233 self.load(key)
235 if key in self.tables:
237 return self.tables[key]
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:242, in OrthologyManager.load(self, key)
240 def load(self, key):
--> 242 self.tables[key] = globals()[f'{key.resource.capitalize()}Orthology'](
243 target = key.target,
244 source = key.source,
245 only_swissprot = key.only_swissprot,
246 id_type = key.id_type,
247 )
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:1492, in OmaOrthology.__init__(self, target, source, id_type, only_swissprot, rel_type, score)
1462 def __init__(
1463 self,
1464 target: int | str,
(...)
1472 score: float | None = None,
1473 ):
1474 """
1475 Orthology translation with Ensembl data.
1476
(...)
1489 Lower threshold for similarity metric.
1490 """
-> 1492 ProteinOrthology.__init__(**locals())
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:869, in ProteinOrthology.__init__(self, target, source, id_type, only_swissprot, **kwargs)
867 self.load_proteome(self.source)
868 self._set_param(kwargs, *self._param)
--> 869 self.load()
File /opt/conda/lib/python3.10/site-packages/pypath/utils/orthology.py:1506, in OmaOrthology.load(self)
1502 if self._from_pickle():
1504 return
-> 1506 oma_data = oma_input.oma_orthologs(
1507 organism_a = self.source,
1508 organism_b = self.target,
1509 id_type = self.id_type,
1510 )
1511 self.data = collections.defaultdict(set)
1513 for rec in oma_data:
File /opt/conda/lib/python3.10/site-packages/pypath/inputs/oma.py:125, in oma_orthologs(organism_a, organism_b, id_type, rel_type, score, return_df)
119 if (
120 (score and rec['score'] < score) or
121 (rel_type and rec['rel_type'] not in rel_type)
122 ):
123 continue
--> 125 a, b = (
126 [
127 OmaGene(
128 id = id_,
129 oma_group = e['oma_group'],
130 hog = e['oma_hog_id'],
131 taxon = e['species']['taxon_id'],
132 chr = e['chromosome'],
133 start = int(e['locus']['start']),
134 end = int(e['locus']['end']),
135 strand = int(e['locus']['strand']),
136 main_isoform = e['is_main_isoform'],
137
138 )
139 for id_ in _id_translate(
140 id_ = e['canonicalid'],
141 taxon = e['species']['taxon_id'],
142 id_type = id_type,
143 )
144 ]
145 for e in (rec[f'entry_{ei}'] for ei in (1, 2))
146 )
149 result.update(
150 {
151 OmaOrthology(
(...)
159 }
160 )
162 if page > n_pages: break
File /opt/conda/lib/python3.10/site-packages/pypath/inputs/oma.py:139, in <genexpr>(.0)
119 if (
120 (score and rec['score'] < score) or
121 (rel_type and rec['rel_type'] not in rel_type)
122 ):
123 continue
125 a, b = (
126 [
127 OmaGene(
128 id = id_,
129 oma_group = e['oma_group'],
130 hog = e['oma_hog_id'],
131 taxon = e['species']['taxon_id'],
132 chr = e['chromosome'],
133 start = int(e['locus']['start']),
134 end = int(e['locus']['end']),
135 strand = int(e['locus']['strand']),
136 main_isoform = e['is_main_isoform'],
137
138 )
--> 139 for id_ in _id_translate(
140 id_ = e['canonicalid'],
141 taxon = e['species']['taxon_id'],
142 id_type = id_type,
143 )
144 ]
145 for e in (rec[f'entry_{ei}'] for ei in (1, 2))
146 )
149 result.update(
150 {
151 OmaOrthology(
(...)
159 }
160 )
162 if page > n_pages: break
File /opt/conda/lib/python3.10/site-packages/pypath/inputs/oma.py:244, in _id_translate(id_, taxon, id_type)
234 if not id_type: return {id_}
236 s_id_type = (
237 'ensg'
238 if id_.startswith('ENS') else
(...)
241 'uniprot'
242 )
--> 244 uniprots = mapping.map_name(
245 id_,
246 s_id_type,
247 'uniprot',
248 ncbi_tax_id = taxon,
249 )
251 return mapping.map_names(
252 uniprots,
253 'uniprot',
254 id_type,
255 ncbi_tax_id = taxon,
256 ) if uniprots else set()
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:3551, in map_name(name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
3498 """
3499 Translates one instance of one ID type to a different one.
3500 Returns set of the target ID type.
(...)
3546 ID, call the `uniprot_cleanup` function at the end.
3547 """
3549 mapper = get_mapper()
-> 3551 return mapper.map_name(
3552 name = name,
3553 id_type = id_type,
3554 target_id_type = target_id_type,
3555 ncbi_tax_id = ncbi_tax_id,
3556 strict = strict,
3557 expand_complexes = expand_complexes,
3558 uniprot_cleanup = uniprot_cleanup,
3559 )
File /opt/conda/lib/python3.10/site-packages/pypath_common/_misc.py:2935, in ignore_unhashable.<locals>.wrapper(*args, **kwargs)
2932 @functools.wraps(func, assigned = attributes)
2933 def wrapper(*args, **kwargs):
2934 try:
-> 2935 return func(*args, **kwargs)
2936 except TypeError as error:
2937 if 'unhashable type' in str(error):
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:2193, in Mapper.map_name(self, name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
2189 # for UniProt IDs we do a few more steps to
2190 # try to find out the primary SwissProt ID
2191 if uniprot_cleanup and target_id_type == 'uniprot':
-> 2193 mapped_names = self.uniprot_cleanup(
2194 uniprots = mapped_names,
2195 ncbi_tax_id = ncbi_tax_id,
2196 )
2198 return mapped_names
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:2227, in Mapper.uniprot_cleanup(self, uniprots, ncbi_tax_id)
2224 # step 2: translate TrEMBL to SwissProt by gene symbols
2225 if self._trembl_swissprot_by_genesymbol:
-> 2227 uniprots = self.trembl_swissprot(
2228 uniprots,
2229 ncbi_tax_id = ncbi_tax_id,
2230 )
2232 # step 3: translate deleted IDs by gene symbols
2233 if self._translate_deleted_uniprot:
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:2868, in Mapper.trembl_swissprot(self, uniprots, ncbi_tax_id)
2865 for uniprot in uniprots:
2867 swissprot = None
-> 2868 genesymbols = self.map_name(
2869 name = uniprot,
2870 id_type = 'trembl',
2871 target_id_type = 'genesymbol',
2872 ncbi_tax_id = ncbi_tax_id,
2873 )
2875 this_swissprots = self.map_names(
2876 names = genesymbols,
2877 id_type = 'genesymbol',
2878 target_id_type = 'swissprot',
2879 ncbi_tax_id = ncbi_tax_id,
2880 )
2882 if not this_swissprots:
File /opt/conda/lib/python3.10/site-packages/pypath_common/_misc.py:2935, in ignore_unhashable.<locals>.wrapper(*args, **kwargs)
2932 @functools.wraps(func, assigned = attributes)
2933 def wrapper(*args, **kwargs):
2934 try:
-> 2935 return func(*args, **kwargs)
2936 except TypeError as error:
2937 if 'unhashable type' in str(error):
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:1978, in Mapper.map_name(self, name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
1964 mapped_names = self.chain_map(
1965 name = name,
1966 id_type = id_type,
(...)
1972 uniprot_cleanup = uniprot_cleanup,
1973 )
1975 else:
1976
1977 # all the other ID types
-> 1978 mapped_names = self._map_name(
1979 name = name,
1980 id_type = id_type,
1981 target_id_type = target_id_type,
1982 ncbi_tax_id = ncbi_tax_id,
1983 )
1985 # as ID translation tables for PRO IDs are not organism specific
1986 # we need an extra step to limit the results to the target organism
1987 if id_type == 'pro' and target_id_type == 'uniprot':
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:2510, in Mapper._map_name(self, name, id_type, target_id_type, ncbi_tax_id)
2503 """
2504 Once we have defined the name type and the target name type,
2505 this function looks it up in the most suitable dictionary.
2506 """
2508 ncbi_tax_id = ncbi_tax_id or self.ncbi_tax_id
-> 2510 tbl = self.which_table(
2511 id_type,
2512 target_id_type,
2513 ncbi_tax_id = ncbi_tax_id,
2514 )
2516 return tbl[name] if tbl else set()
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:1562, in Mapper.which_table(self, id_type, target_id_type, load, ncbi_tax_id)
1551 if resource:
1553 self._log(
1554 'Chosen built-in defined ID translation table: '
1555 'resource=%s, id_type_a=%s, id_type_b=%s' % (
(...)
1559 )
1560 )
-> 1562 self.load_mapping(
1563 resource = resource,
1564 load_a_to_b = load_a_to_b,
1565 load_b_to_a = load_b_to_a,
1566 ncbi_tax_id = ncbi_tax_id,
1567 )
1569 tbl = check_loaded()
1571 break
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:3208, in Mapper.load_mapping(self, resource, **kwargs)
3195 ncbi_tax_id = kwargs.get('ncbi_tax_id', resource.ncbi_tax_id)
3197 self._log(
3198 'Loading mapping table for organism `%s` '
3199 'with identifiers `%s` and `%s`, '
(...)
3205 )
3206 )
-> 3208 reader = MapReader(param = resource, **kwargs)
3210 a_to_b = reader.mapping_table_a_to_b
3211 b_to_a = reader.mapping_table_b_to_a
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:258, in MapReader.__init__(self, param, ncbi_tax_id, entity_type, load_a_to_b, load_b_to_a, uniprots, lifetime, resource_id_types)
255 self.uniprots = uniprots
256 self._resource_id_types = resource_id_types
--> 258 self.load()
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:288, in MapReader.load(self)
283 self.read_cache()
285 if not self.tables_loaded():
286
287 # read from the original source
--> 288 self.read()
290 if self.tables_loaded():
291
292 # write cache only at successful loading
293 self.write_cache()
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:450, in MapReader.read(self)
446 method = 'read_mapping_%s' % self.source_type
448 if hasattr(self, method):
--> 450 getattr(self, method)()
File /opt/conda/lib/python3.10/site-packages/pypath/utils/mapping.py:893, in MapReader.read_mapping_uniprot(self)
891 protein_name = self.param.field == 'protein names'
892 query.name_process = not protein_name and not trembl
--> 893 data = query.perform()
895 if not query.name_process:
897 def maybe_split(v):
File /opt/conda/lib/python3.10/site-packages/pypath/inputs/uniprot.py:681, in UniprotQuery.perform(self)
669 def perform(self) -> list[str] | dict[str, str] | dict[str, dict[str, str]]:
670 """
671 Perform the query and preprocess the result.
672
(...)
678 kind described in the previous point as values.
679 """
--> 681 _id, *variables = zip(*self)
682 _id = list(map(common.sfirst, _id))
684 if variables:
File /opt/conda/lib/python3.10/site-packages/pypath/inputs/uniprot.py:656, in UniprotQuery.__iter__(self)
648 c = curl.Curl(
649 self._baseurl,
650 get = self._get,
(...)
653 compr = 'gz',
654 )
655 result = c.result if c.result or self.fail_on_empty else [0].__iter__()
--> 656 _ = next(result)
657 _proc0 = functools.partial(self._FIELDEND.sub, '')
658 _proc1 = self._FIELDSEP.split if self.name_process else common.identity
File /opt/conda/lib/python3.10/site-packages/pypath/share/curl.py:766, in FileOpener.iterfile(fileobj)
763 @staticmethod
764 def iterfile(fileobj):
--> 766 for line in fileobj:
768 yield line
File /opt/conda/lib/python3.10/gzip.py:314, in GzipFile.read1(self, size)
312 if size < 0:
313 size = io.DEFAULT_BUFFER_SIZE
--> 314 return self._buffer.read1(size)
File /opt/conda/lib/python3.10/_compression.py:68, in DecompressReader.readinto(self, b)
66 def readinto(self, b):
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
70 return len(data)
File /opt/conda/lib/python3.10/gzip.py:496, in _GzipReader.read(self, size)
493 # Read a chunk of data from the file
494 buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
--> 496 uncompress = self._decompressor.decompress(buf, size)
497 if self._decompressor.unconsumed_tail != b"":
498 self._fp.prepend(self._decompressor.unconsumed_tail)
error: Error -3 while decompressing data: invalid block type
python version: 3.10
Package Version
----------------------------- -------------
absl-py 1.4.0
adjustText 1.0.4
agate 1.7.1
agate-dbf 0.2.2
agate-excel 0.2.3
agate-sql 0.5.9
aiobotocore 2.4.2
aioeasywebdav 2.4.0
aiohttp 3.8.4
aioitertools 0.11.0
aiosignal 1.3.1
amply 0.1.5
anndata 0.9.2
anndata2ri 1.2
annoy 1.17.2
anyio 3.7.1
appdirs 1.4.4
argcomplete 3.1.1
argon2-cffi 21.3.0
argon2-cffi-bindings 21.2.0
arrow 1.2.3
arviz 0.17.1
asciitree 0.3.3
asttokens 2.2.1
astunparse 1.6.3
async-lru 2.0.4
async-timeout 4.0.2
attmap 0.13.2
attrs 23.2.0
Babel 2.12.1
backcall 0.2.0
backports.functools-lru-cache 1.6.4
batchglm 0.7.4
bcrypt 4.1.3
beautifulsoup4 4.12.3
binaryornot 0.4.4
bioinfokit 2.1.3
biopython 1.81
bioservices 1.11.2
bleach 6.0.0
blinker 1.5
bokeh 3.2.2
boltons 24.0.0
boto3 1.24.59
botocore 1.27.59
Bottleneck 1.3.7
brotlipy 0.7.0
cached-property 1.5.2
cachetools 5.3.0
cellrank 2.0.0
certifi 2024.2.2
cffi 1.16.0
chardet 5.2.0
charset-normalizer 3.3.2
chex 0.1.7
cld2-cffi 0.1.4
click 8.1.3
cloudpickle 2.2.1
colorama 0.4.6
colorlog 6.7.0
comm 0.1.4
compress-pickle 1.1.0
conda 23.3.0
conda-content-trust 0.1.3
conda-package-handling 2.0.2
conda_package_streaming 0.7.0
ConfigArgParse 1.5.3
connection-pool 0.0.3
conorm 1.2.0
contextlib2 21.6.0
contourpy 1.2.1
cookiecutter 2.6.0
cramjam 2.6.2
cryptography 42.0.7
csvkit 1.1.1
custom-inherit 2.4.1
cycler 0.12.1
Cython 3.0.0
cytoolz 0.12.2
dask 2023.8.1
dask-image 2023.3.0
datrie 0.8.2
dbfread 2.0.7
debugpy 1.6.8
decorator 5.1.1
decoupler 1.6.0
defusedxml 0.7.1
Deprecated 1.2.14
diffxpy 0.7.4
dill 0.3.8
distributed 2023.8.1
dm-tree 0.1.8
dnspython 2.4.2
docrep 0.3.2
docutils 0.19
dpath 2.1.5
dropbox 11.36.0
dunamai 1.18.0
easydev 0.12.0
entrypoints 0.4
equinox 0.11.3
et-xmlfile 1.1.0
exceptiongroup 1.1.1
executing 1.2.0
face 20.1.1
fasteners 0.17.3
fastjsonschema 2.16.3
fastparquet 2023.2.0
fbpca 1.0
filechunkio 1.8
filelock 3.10.7
flatbuffers 23.5.26
flax 0.6.1
flit_core 3.9.0
fonttools 4.51.0
fqdn 1.5.1
frozenlist 1.3.3
fsspec 2023.3.0
ftputil 5.0.4
future 1.0.0
gast 0.4.0
gcsfs 2023.3.0
geosketch 1.2
get_version 3.5.4
gevent 22.10.2
gitdb 4.0.10
GitPython 3.1.31
glom 23.5.0
gmpy2 2.1.2
google-api-core 2.11.0
google-api-python-client 2.83.0
google-auth 2.17.0
google-auth-httplib2 0.1.0
google-auth-oauthlib 1.0.0
google-cloud-core 2.3.2
google-cloud-storage 2.8.0
google-crc32c 1.1.2
google-pasta 0.2.0
google-resumable-media 2.4.1
googleapis-common-protos 1.57.1
graphtools 1.5.3
greenlet 2.0.2
grequests 0.6.0
grpcio 1.54.3
gseapy 1.0.6
h11 0.14.0
h2 4.1.0
h5netcdf 1.3.0
h5py 3.9.0
harmonypy 0.0.9
hpack 4.0.0
html5lib 1.1
httpcore 0.17.3
httplib2 0.22.0
humanfriendly 10.0
hyperframe 6.0.1
hyperopt 0.1.2
idna 3.7
igraph 0.10.8
imagecodecs 2023.1.23
imageio 2.31.1
importlib-metadata 6.1.0
importlib-resources 5.12.0
inflect 7.0.0
iniconfig 2.0.0
intervaltree 3.1.0
ipykernel 6.25.1
ipython 8.14.0
ipywidgets 8.1.2
isodate 0.6.1
isoduration 20.11.0
jax 0.4.13
jaxlib 0.4.12
jaxopt 0.8.3
jaxtyping 0.2.28
jedi 0.19.0
Jinja2 3.1.2
jmespath 1.0.1
joblib 1.3.2
json5 0.9.14
jsonpointer 2.0
jsonschema 4.17.3
jupyter_client 8.3.0
jupyter_core 5.3.0
jupyter-events 0.6.3
jupyter-lsp 2.2.0
jupyter_server 2.7.1
jupyter-server-mathjax 0.2.6
jupyter_server_terminals 0.4.4
jupyterlab 4.0.5
jupyterlab-git 0.41.0
jupyterlab-pygments 0.2.2
jupyterlab_server 2.24.0
jupyterlab_widgets 3.0.10
keras 2.12.0
Keras-Preprocessing 1.1.2
kiwisolver 1.4.5
lazy_loader 0.3
leather 0.3.4
leidenalg 0.10.1
libmambapy 1.4.9
lightning-utilities 0.9.0
lineax 0.0.4
llvmlite 0.38.1
locket 1.0.0
logmuse 0.2.6
loompy 3.0.6
louvain 0.8.1
lxml 5.2.2
lz4 4.3.2
mamba 1.4.9
Markdown 3.4.4
markdown-it-py 2.2.0
MarkupSafe 2.1.3
matplotlib 3.9.0
matplotlib-inline 0.1.6
matplotlib-scalebar 0.8.1
matplotlib-venn 0.11.9
mdurl 0.1.0
mistune 3.0.1
mizani 0.9.2
ml-collections 0.1.1
ml-dtypes 0.2.0
mpi4py 3.1.4
mpmath 1.3.0
msgpack 1.0.5
mudata 0.2.3
MulticoreTSNE 0.1
multidict 6.0.4
multipledispatch 0.6.0
munkres 1.1.4
muon 0.1.5
NaiveDE 1.2.0
natsort 8.4.0
nbclient 0.8.0
nbconvert 7.7.4
nbdime 3.2.1
nbformat 5.8.0
ncls 0.0.68
nest-asyncio 1.5.6
networkx 3.1
notebook 7.0.2
notebook_shim 0.2.3
numba 0.55.2
numcodecs 0.11.0
numexpr 2.8.3
numpy 1.26.4
numpy-groupies 0.9.22
numpyro 0.12.1
oauth2client 4.1.3
oauthlib 3.2.2
omnipath 1.0.7
openpyxl 3.1.2
opt-einsum 3.3.0
optax 0.1.7
ott-jax 0.4.5
overrides 7.4.0
packaging 24.0
pandas 2.2.2
pandocfilters 1.5.0
paramiko 3.4.0
parsedatetime 2.4
parso 0.8.3
partd 1.4.0
patsy 0.5.3
peppy 0.35.5
pertpy 0.6.0
petsc4py 3.19.4
pexpect 4.8.0
phate 1.0.11
pickleshare 0.7.5
pillow 10.3.0
PIMS 0.6.1
pip 23.0.1
pkgutil_resolve_name 1.3.10
plac 1.3.5
platformdirs 4.2.2
plotnine 0.10.1
pluggy 1.0.0
ply 3.11
pooch 1.7.0
prettytable 3.6.0
progressbar2 4.2.0
prometheus-client 0.17.1
prompt-toolkit 3.0.36
protobuf 4.21.12
psutil 5.9.8
ptyprocess 0.7.0
PuLP 2.7.0
pure-eval 0.2.2
py-cpuinfo 9.0.0
pyarrow 11.0.0
pyasn1 0.4.8
pyasn1-modules 0.2.7
pycosat 0.6.4
pycparser 2.22
pycurl 7.45.3
pydantic 1.10.13
pygam 0.9.0
Pygments 2.14.0
pygpcca 1.0.4
PyGSP 0.5.1
PyICU 2.8
PyJWT 2.6.0
pymongo 4.4.1
PyNaCl 1.5.0
pynndescent 0.5.10
Pyomo 6.7.1
pyOpenSSL 23.1.1
pypairs 3.2.3
pyparsing 3.1.2
pypath_common 0.2.0
pypath-omnipath 0.16.15
pypi-latest 0.1.2
pypng 0.20220715.0
PyQt5 5.15.7
PyQt5-sip 12.11.0
pyranges 0.0.120
pyreadr 0.5.0
pyreadstat 1.2.1
pyrle 0.0.38
pyro-api 0.1.2
pyro-ppl 1.8.6+4be5c2e
pyroe 0.9.2
pyrsistent 0.19.3
pysftp 0.2.9
PySocks 1.7.1
pytest 7.2.2
python-dateutil 2.9.0.post0
python-igraph 0.10.8
python-irodsclient 1.1.6
python-json-logger 2.0.7
python-slugify 8.0.1
python-utils 3.7.0
pytimeparse 1.1.8
pytorch-lightning 1.9.4
pytz 2024.1
pyu2f 0.1.5
PyWavelets 1.4.1
PyYAML 6.0.1
pyzmq 25.1.1
questionary 2.0.1
rdata 0.11.2
reportlab 4.1.0
requests 2.32.2
requests-cache 0.4.13
requests-oauthlib 1.3.1
reretry 0.11.8
rfc3339-validator 0.1.4
rfc3986-validator 0.1.1
rich 13.3.3
rpy2 3.5.13
rsa 4.9
ruamel.yaml 0.17.21
ruamel.yaml.clib 0.2.7
s-gd2 1.7
s3fs 2023.3.0
s3transfer 0.6.0
sc-toolbox 0.12.3
Scachepy 0.0.3
scanorama 1.7.3
scanpy 1.9.2
scDGD 0.2
scikit-image 0.21.0
scikit-learn 1.3.0
scikit-misc 0.1.4
scipy 1.13.0
scprep 1.2.3
scrublet 0.2.3
scvelo 0.2.5
scvi 0.6.8
scvi-tools 0.20.3
seaborn 0.12.2
Send2Trash 1.8.2
session-info 1.0.0
setuptools 67.6.1
setuptools-scm 7.1.0
simplegeneric 0.8.1
sip 6.7.7
six 1.16.0
skranger 0.8.0
slacker 0.14.0
slepc4py 3.19.1
slicerator 1.1.0
smart-open 6.3.0
smmap 3.0.5
snakemake 7.25.0
sniffio 1.3.0
sorted-nearest 0.0.39
sortedcontainers 2.4.0
soupsieve 2.5
sparse 0.14.0
sparsecca 0.3.1
SpatialDE 1.1.3
SQLAlchemy 1.4.46
sqlparse 0.5.0
squidpy 1.2.3
stack-data 0.6.2
statsmodels 0.14.0
stdlib-list 0.8.0
stone 3.3.1
stopit 1.1.2
suds-community 1.1.2
sympy 1.12
tables 3.8.0
tabulate 0.9.0
tasklogger 1.2.0
tblib 1.7.0
tensorboard 2.12.3
tensorboard-data-server 0.7.0
tensorflow 2.12.1
tensorflow-estimator 2.12.0
tensorflow-probability 0.20.0
termcolor 2.3.0
terminado 0.17.1
text-unidecode 1.3
texttable 1.6.7
textwrap3 0.9.2
threadpoolctl 3.2.0
throttler 1.2.1
tifffile 2023.8.12
timeloop 1.0.2
tinycss2 1.2.1
toml 0.10.2
tomli 2.0.1
tomlkit 0.12.1
toolz 0.12.0
toposort 1.10
torch 2.0.0
torchmetrics 1.0.3
tornado 6.4
toyplot 1.0.3
toytree 2.0.5
tqdm 4.66.4
traitlets 5.9.0
typeguard 2.13.3
typing_extensions 4.11.0
typing-utils 0.1.0
tzdata 2024.1
tzlocal 5.0.1
ubiquerg 0.6.2
umap-learn 0.5.3
unicodedata2 15.0.0
Unidecode 1.3.6
uri-template 1.3.0
uritemplate 4.1.1
urllib3 2.2.1
validators 0.22.0
veracitools 0.1.3
vpolo 0.3.0
wcwidth 0.2.6
webcolors 1.13
webencodings 0.5.1
websocket-client 1.6.1
Werkzeug 2.3.7
wheel 0.40.0
widgetsnbextension 4.0.10
wrapt 1.15.0
xarray 2024.5.0
xarray-einstats 0.7.0
xlrd 2.0.1
xmltodict 0.13.0
xyzservices 2023.7.0
yarl 1.8.2
yq 3.2.2
yte 1.5.1
zappy 0.2.0
zarr 2.16.1
zict 3.0.0
zipp 3.15.0
zope.event 5.0
zope.interface 6.0
zstandard 0.19.0
Thanks for the traceback! Most likely the reason is some recent changes in OMA's website and API. Work is underway to fix it.
Hi I am getting similar err
zlib.error: Error -3 while decompressing data: invalid code -- missing end-of-block
when trying to do dc.get_progeny(organism='mouse', top=500)
. While dc.get_progeny(organism='human', top=500)
works fine.
Hi All, Sorry for getting back to this issue so late, especially that the solution seems to be a trivial little thing: some transactions to UniProt sometimes are slow and time out. To increase the timeout, do the followings:
import decoupler as dc
from pypath.share import settings
settings.setup(curl_timeout = 1200) # 1200 seconds
net = dc.get_progeny(organism = 'mouse', top = 500)
One more thing: because after the previous attempts a corrupted file remained in the cache, you have to remove it first. To do this, I recommend wiping the complete pypath cache:
from pypath.share import settings
# check where is your cache directory:
settings.get('cachedir')
'/home/denes/.cache/pypath'
rm /home/denes/.cache/pypath/*
TLDR:
A full tracback from this error:
>>> import pypath
>>> pypath.__version__
'0.16.17'
>>> from pypath.share import settings
>>> settings.setup(cachedir = '/tmp/tmpcache2')
>>> import decoupler as dc
>>> pgm = dc.get_progeny(organism = 'mouse', top = 500)
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "/home/denes/contrib/decoupler-py/decoupler/omnip.py", line 250, in get_progeny
p = _annotation_identifiers(p, organism, genesymbol_resource)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/contrib/decoupler-py/decoupler/omnip.py", line 919, in _annotation_identifiers
net = translate_net(
^^^^^^^^^^^^^^
File "/home/denes/contrib/decoupler-py/decoupler/omnip.py", line 758, in translate_net
hom_net = orthology.translate_df(
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 2262, in translate_df
return manager.translate_df(**args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 620, in translate_df
ortho_df = self.get_df(**args)
^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 515, in get_df
table = self.which_table(
^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 233, in which_table
self.load(key)
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 242, in load
self.tables[key] = globals()[f'{key.resource.capitalize()}Orthology'](
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 1492, in __init__
ProteinOrthology.__init__(**locals())
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 869, in __init__
self.load()
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/orthology.py", line 1506, in load
oma_data = oma_input.oma_orthologs(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/inputs/oma.py", line 125, in oma_orthologs
a, b = (
^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/inputs/oma.py", line 139, in <genexpr>
for id_ in _id_translate(
^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/inputs/oma.py", line 244, in _id_translate
uniprots = mapping.map_name(
^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 3551, in map_name
return mapper.map_name(
^^^^^^^^^^^^^^^^
File "/home/denes/contrib/decoupler-py/pypath_common/_misc.py", line 2953, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 2193, in map_name
mapped_names = self.uniprot_cleanup(
^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 2227, in uniprot_cleanup
uniprots = self.trembl_swissprot(
^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 2868, in trembl_swissprot
genesymbols = self.map_name(
^^^^^^^^^^^^^^
File "/home/denes/contrib/decoupler-py/pypath_common/_misc.py", line 2953, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 1978, in map_name
mapped_names = self._map_name(
^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 2510, in _map_name
tbl = self.which_table(
^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 1562, in which_table
self.load_mapping(
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 3208, in load_mapping
reader = MapReader(param = resource, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 258, in __init__
self.load()
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 288, in load
self.read()
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 450, in read
getattr(self, method)()
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/utils/mapping.py", line 893, in read_mapping_uniprot
data = query.perform()
^^^^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/inputs/uniprot.py", line 681, in perform
_id, *variables = zip(*self)
^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/inputs/uniprot.py", line 656, in __iter__
_ = next(result)
^^^^^^^^^^^^
File "/home/denes/.cache/pypoetry/virtualenvs/decoupler-I1NF36RP-py3.12/lib/python3.12/site-packages/pypath/share/curl.py", line 766, in iterfile
for line in fileobj:
^^^^^^^
File "/usr/lib/python3.12/gzip.py", line 337, in read1
return self._buffer.read1(size)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/_compression.py", line 68, in readinto
data = self.read(len(byte_view))
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/gzip.py", line 535, in read
uncompress = self._decompressor.decompress(buf, size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
zlib.error: Error -3 while decompressing data: invalid block type
Meanwhile in the log we see:
442325 [2024-10-20 19:44:49] [mapping] Requested to load ID translation table from `trembl` to `genesymbol`, organism: 10090.
442326 [2024-10-20 19:44:49] [mapping] Chosen built-in defined ID translation table: resource=basic, id_type_a=genesymbol, id_type_b=trembl
442327 [2024-10-20 19:44:49] [mapping] Loading mapping table for organism `10090` with identifiers `genesymbol` and `trembl`, input type `uniprot`
442328 [2024-10-20 19:44:49] [mapping] Reader created for ID translation table, parameters: `ncbi_tax_id=10090, id_a=genesymbol, id_b=trembl, load_a_to_b=0, load_b_to_a
442328 =1, input_type=uniprot (UniprotMapping)`. 442329 [2024-10-20 19:44:49] [mapping] UniProt REST API call: `https://rest.uniprot.org/uniprotkb/stream?query=(reviewed:true AND organism_id:10090)&format=tsv&fields=a
442329 ccession,gene_primary&compressed=true`. 442330 [2024-10-20 19:44:49] [curl] Creating Curl object to retrieve data from `https://rest.uniprot.org/uniprotkb/stream`
442331 [2024-10-20 19:44:49] [curl] GET parameters added to the URL: `query=%28reviewed%3Atrue+AND+organism_id%3A10090%29&format=tsv&fields=accession%2Cgene_primary&com
442331 pr` 442332 [2024-10-20 19:44:49] [curl] Cache file path: `/tmp/tmpcache2/35e4aacb930d7cf70ae48c3644c176b3-stream`
442333 [2024-10-20 19:44:49] [curl] Setting up and calling pycurl.
442334 [2024-10-20 19:45:57] [curl] CURL DEBUG INFO: ERROR
442335 [2024-10-20 19:45:57] [curl] PycURL error: (18, 'transfer closed with outstanding read data remaining')
442336 [2024-10-20 19:46:07] [curl] Opening file `/tmp/tmpcache2/35e4aacb930d7cf70ae48c3644c176b3-stream`
442337 [2024-10-20 19:46:07] [curl] Extracting data from file type `gz`
442338 [2024-10-20 19:46:07] [curl] Opening gzip file `/tmp/tmpcache2/35e4aacb930d7cf70ae48c3644c176b3-stream`.
442339 [2024-10-20 19:46:07] [curl] Result is an iterator over the lines of `/tmp/tmpcache2/35e4aacb930d7cf70ae48c3644c176b3-stream`.
442340 [2024-10-20 19:46:07] [curl] File at `https://rest.uniprot.org/uniprotkb/stream?query=%28reviewed%3Atrue+AND+organism_id%3A10090%29&format=tsv&fields=accession%2
442340 Cgene_primary&compressed=true` successfully retrieved. 442341 Resulted file type `gz extracted data, file object`. Local file at `/tmp/tmpcache2/35e4aacb930d7cf70ae48c3644c176b3-stream`.
Above we see the relevant curl
error: PycURL error: (18, 'transfer closed with outstanding read data remaining')
. Then the gzip file is opened, but since it's corrupted, gzip raises an exception while reading it:
File "/usr/lib/python3.12/gzip.py", line 535, in read
uncompress = self._decompressor.decompress(buf, size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
zlib.error: Error -3 while decompressing data: invalid block type
Hi @deeenes
This worked
Thank you for your help
This clearly proved to be a HTTP timeout issue, that many started to experience in the past weeks. I set the timeout higher for UniProt queries in pypath-omnipath
, and this should address the issue in most of the cases. For this you should update pypath-omnipath
from this git repo. Alternatively, you can always set the timeout manually. I'm closing the issue since it seems to be solved.
Dear developer, I get an error when running the following code:
progeny = dc.get_progeny(organism='Mus musculus', top=500)
the error is : error: Error -3 while decompressing data: invalid code lengths set
Desktop (please complete the following information):