saezlab / pypath

Python module for prior knowledge integration. Builds databases of signaling pathways, enzyme-substrate interactions, complexes, annotations and intercellular communication roles.
http://omnipathdb.org/
GNU General Public License v3.0
134 stars 47 forks source link

Unable to download progeny mouse model #218

Closed jpfeil closed 2 weeks ago

jpfeil commented 1 year ago

Describe the bug Tried downloading the progeny mouse model but I get a TypeError: Nonetype is not iterable. Human works fine.

To Reproduce

model = dc.get_progeny(organism='mouse', top=100)
model = dc.get_progeny(organism='human', top=100)
model

  | source | target | weight | p_value
-- | -- | -- | -- | --
Androgen | TMPRSS2 | 11.490631 | 0.000000e+00
Androgen | NKX3-1 | 10.622551 | 2.242078e-44
Androgen | MBOAT2 | 10.472733 | 4.624285e-44
Androgen | KLK2 | 10.176186 | 1.944414e-40
Androgen | SARG | 11.386852 | 2.790209e-40
... | ... | ... | ...
p53 | CCDC150 | -3.174527 | 7.396252e-13
p53 | LCE1A | 6.154823 | 8.475458e-13
p53 | TREM2 | 4.101937 | 9.739648e-13
p53 | GDF9 | 3.355741 | 1.087433e-12
p53 | NHLH2 | 2.201638 | 1.651582e-12

Expected behavior Expected to download the mouse marker genes.

System

Additional context

TypeError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 model = dc.get_progeny(organism='mouse', top=100)
      2 model

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/decoupler/omnip.py:139, in get_progeny(organism, top)
    135 p.columns = ['source', 'target', 'weight', 'p_value']
    137 if not _is_human(organism):
--> 139     p = translate_net(
    140         p,
    141         columns='target',
    142         source_organism=9606,
    143         target_organism=organism,
    144     )
    146 return p

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/decoupler/omnip.py:453, in translate_net(net, columns, source_organism, target_organism, id_type, unique_by, **kwargs)
    450 hom_net = net.copy()
    452 # Translate
--> 453 hom_net = homology.translate_df(
    454     df=hom_net,
    455     target=_target_organism,
    456     cols=columns,
    457     source=_source_organism,
    458 )
    460 unique_by = common.to_list(unique_by)
    462 if unique_by and all(c in hom_net.columns for c in unique_by):
    463 
    464     # Remove duplicated based on source and target

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:1901, in translate_df(df, target, cols, source, homologene, ensembl, ensembl_hc, ensembl_types, **kwargs)
   1898 args.pop('manager')
   1899 kwargs = args.pop('kwargs')
-> 1901 return manager.translate_df(**args, **kwargs)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:480, in HomologyManager.translate_df(self, df, target, cols, source, homologene, ensembl, ensembl_hc, ensembl_types, **kwargs)
    477 source = taxonomy.ensure_ncbi_tax_id(source)
    478 target = taxonomy.ensure_ncbi_tax_id(target)
--> 480 table = self.which_table(
    481     target = target,
    482     source = source,
    483 )
    485 param = self._translation_param(locals())
    487 return table.translate_df(
    488     df = df,
    489     cols = cols,
   (...)
    492     **kwargs
    493 )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:183, in HomologyManager.which_table(self, target, source, only_swissprot)
    179 self.expiry[key] = time.time()
    181 if key not in self.tables:
--> 183     self.load(key)
    185 if key in self.tables:
    187     return self.tables[key]

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:214, in HomologyManager.load(self, key)
    207     self._log(
    208         'Homology table from taxon %u to %u (only SwissProt: %s) '
    209         'has been loaded from `%s`.' % (key + (cachefile,))
    210     )
    212 else:
--> 214     self.tables[key] = self._load(key)
    215     pickle.dump(self.tables[key], open(cachefile, 'wb'))
    216     self._log(
    217         'Homology table from taxon %u to %u (only SwissProt: %s) '
    218         'has been saved to `%s`.' % (key + (cachefile,))
    219     )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:224, in HomologyManager._load(self, key)
    222 def _load(self, key):
--> 224     return ProteinHomology(
    225         target = key[1],
    226         source = key[0],
    227         only_swissprot = key[2],
    228         homologene = self.homologene,
    229         ensembl = self.ensembl,
    230         ensembl_hc = self.ensembl_hc,
    231         ensembl_types = self.ensembl_types,
    232     )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:715, in ProteinHomology.__init__(self, target, source, only_swissprot, homologene, ensembl, ensembl_hc, ensembl_types)
    711 self.load_proteome(self.target, self.only_swissprot)
    713 if source is not None:
--> 715     self.load(source)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:731, in ProteinHomology.load(self, source)
    727 def load(self, source = None):
    729     if self.homologene:
--> 731         self.load_homologene(source)
    733     if self.ensembl:
    735         self.load_ensembl(source)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/homology.py:1206, in ProteinHomology.load_homologene(self, source)
   1200         target_r.update(hgr[r])
   1202 for e in target_e:
   1204     target_u.update(
   1205         set(
-> 1206             mapping.map_name(e, 'entrez', 'uniprot', self.target)
   1207         )
   1208     )
   1210 for r in target_r:
   1212     target_u.update(
   1213         set(
   1214             mapping.map_name(e, 'refseqp', 'uniprot', self.target)
   1215         )
   1216     )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:3323, in map_name(name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
   3270 """
   3271 Translates one instance of one ID type to a different one.
   3272 Returns set of the target ID type.
   (...)
   3318         ID, call the `uniprot_cleanup` function at the end.
   3319 """
   3321 mapper = get_mapper()
-> 3323 return mapper.map_name(
   3324     name = name,
   3325     id_type = id_type,
   3326     target_id_type = target_id_type,
   3327     ncbi_tax_id = ncbi_tax_id,
   3328     strict = strict,
   3329     expand_complexes = expand_complexes,
   3330     uniprot_cleanup = uniprot_cleanup,
   3331 )

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2000, in Mapper.map_name(self, name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
   1996 # for UniProt IDs we do a few more steps to
   1997 # try to find out the primary SwissProt ID
   1998 if uniprot_cleanup and target_id_type == 'uniprot':
-> 2000     mapped_names = self.uniprot_cleanup(
   2001         uniprots = mapped_names,
   2002         ncbi_tax_id = ncbi_tax_id,
   2003     )
   2005 return mapped_names

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2029, in Mapper.uniprot_cleanup(self, uniprots, ncbi_tax_id)
   2026 uniprots = common.to_set(uniprots)
   2028 # step 1: translate secondary IDs to primary
-> 2029 uniprots = self.primary_uniprot(uniprots)
   2031 # step 2: translate TrEMBL to SwissProt by gene symbols
   2032 if self._trembl_swissprot_by_genesymbol:

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2599, in Mapper.primary_uniprot(self, uniprots, ncbi_tax_id)
   2595 ncbi_tax_id = ncbi_tax_id or self.ncbi_tax_id
   2597 for uniprot in uniprots:
-> 2599     primary = self.map_name(
   2600         name = uniprot,
   2601         id_type = 'uniprot-sec',
   2602         target_id_type = 'uniprot-pri',
   2603         ncbi_tax_id = ncbi_tax_id,
   2604     )
   2606     if primary:
   2608         primaries.update(primary)

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:1785, in Mapper.map_name(self, name, id_type, target_id_type, ncbi_tax_id, strict, expand_complexes, uniprot_cleanup)
   1771     mapped_names = self.chain_map(
   1772         name = name,
   1773         id_type = id_type,
   (...)
   1779         uniprot_cleanup = uniprot_cleanup,
   1780     )
   1782 else:
   1783 
   1784     # all the other ID types
-> 1785     mapped_names = self._map_name(
   1786         name = name,
   1787         id_type = id_type,
   1788         target_id_type = target_id_type,
   1789         ncbi_tax_id = ncbi_tax_id,
   1790     )
   1792 # as ID translation tables for PRO IDs are not organism specific
   1793 # we need an extra step to limit the results to the target organism
   1794 if id_type == 'pro' and target_id_type == 'uniprot':

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2317, in Mapper._map_name(self, name, id_type, target_id_type, ncbi_tax_id)
   2310 """
   2311 Once we have defined the name type and the target name type,
   2312 this function looks it up in the most suitable dictionary.
   2313 """
   2315 ncbi_tax_id = ncbi_tax_id or self.ncbi_tax_id
-> 2317 tbl = self.which_table(
   2318     id_type,
   2319     target_id_type,
   2320     ncbi_tax_id = ncbi_tax_id,
   2321 )
   2323 return tbl[name] if tbl else set()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:1383, in Mapper.which_table(self, id_type, target_id_type, load, ncbi_tax_id)
   1372 if resource:
   1374     self._log(
   1375         'Chosen built-in defined ID translation table: '
   1376         'resource=%s, id_type_a=%s, id_type_b=%s' % (
   (...)
   1380         )
   1381     )
-> 1383     self.load_mapping(
   1384         resource = resource,
   1385         load_a_to_b = load_a_to_b,
   1386         load_b_to_a = load_b_to_a,
   1387         ncbi_tax_id = ncbi_tax_id,
   1388     )
   1390     tbl = check_loaded()
   1392     break

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:2973, in Mapper.load_mapping(self, resource, **kwargs)
   2958     return
   2960 self._log(
   2961     'Loading mapping table for organism `%s` '
   2962     'with identifiers `%s` and `%s`, '
   (...)
   2970     )
   2971 )
-> 2973 reader = MapReader(param = resource, **kwargs)
   2975 a_to_b = reader.mapping_table_a_to_b
   2976 b_to_a = reader.mapping_table_b_to_a

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:235, in MapReader.__init__(self, param, ncbi_tax_id, entity_type, load_a_to_b, load_b_to_a, uniprots, lifetime)
    232 self.b_to_a = None
    233 self.uniprots = uniprots
--> 235 self.load()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:265, in MapReader.load(self)
    260     self.read_cache()
    262 if not self.tables_loaded():
    263 
    264     # read from the original source
--> 265     self.read()
    267     if self.tables_loaded():
    268 
    269         # write cache only at successful loading
    270         self.write_cache()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:423, in MapReader.read(self)
    419 method = 'read_mapping_%s' % self.source_type
    421 if hasattr(self, method):
--> 423     getattr(self, method)()

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/utils/mapping.py:534, in MapReader.read_mapping_file(self)
    531 a_to_b = collections.defaultdict(set)
    532 b_to_a = collections.defaultdict(set)
--> 534 for i, line in enumerate(infile):
    536     if self.param.header and i < self.param.header:
    538         continue

File ~/miniconda3/envs/scvi-env/lib/python3.9/site-packages/pypath/inputs/uniprot.py:481, in get_uniprot_sec(organism)
    470 url = urls.urls['uniprot_sec']['url']
    471 c = curl.Curl(url, silent = False, large = True, timeout = 2400)
    473 for line in filter(
    474     lambda line:
    475         len(line) == 2 and (organism is None or line[1] in proteome),
    476         map(
    477             lambda i:
    478                 i[1].split(),
    479             filter(
    480                 lambda i: i[0] >= 30,
--> 481                 enumerate(c.result)
    482             )
    483         )
    484     ):
    486     yield line

TypeError: 'NoneType' object is not iterable
SZ-qing commented 1 year ago

Yes, I also encounter this error when I run the mouse model, it seems to be the error of converting mouse gene to human gene.

PauBadiaM commented 1 year ago

Hi @jpfeil and @nierq,

Indeed there seems to be a server side issue with the conversion. @deeenes could you take a look please?

xiangrong7 commented 1 year ago

hello, I also encounter the same error。

>>> import pycurl
>>> import decoupler as dc

>>>
>>>
>>> dc.get_progeny(organism = 'Mus musculus',top =100)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/decoupler/omnip.py", line 139, in get_progeny
    p = translate_net(
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/decoupler/omnip.py", line 432, in translate_net
    _target_organism = taxonomy.ensure_ncbi_tax_id(target_organism)
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 367, in ensure_ncbi_tax_id
    taxid_from_common_name(taxon_id) or
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 283, in taxid_from_common_name
    common_to_ncbi = get_db('common')
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 435, in get_db
    init_db(key)
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/utils/taxonomy.py", line 450, in init_db
    ncbi_data = uniprot_input.uniprot_ncbi_taxids_2()
  File "/public/home/niuxy/Software/miniconda3/envs/py39/lib/python3.9/site-packages/pypath/inputs/uniprot.py", line 1301, in uniprot_ncbi_taxids_2
    for line in c.result:
TypeError: 'NoneType' object is not iterable
SZ-qing commented 1 year ago

I now have an alternative solution to this problem. step 1: Using R packet progeny to obtain model data used by decoupler

library(progeny) model_data <- progeny::model_mouse_full

write.table(model_data ,'./progeny_mouse_geneset.txt')`

step2 In python gene_set = pd.read_csv("./progeny_mouse_geneset.txt",sep='\t') gene_set.rename(columns = {'gene':'target','pathway':'source','p.value':'p_value'},inplace = True) gene_set = gene_set.reindex(columns=['source','target','weight','p_value'])

select top n gene

gene_set = ( gene_set. sort_values('p_value'). groupby('source'). head(200). sort_values(['source', 'p_value']). reset_index() )

dc.run_mlm(mat=raw_data, net=gene_set, source='source', target='target', weight='weight', verbose=True, use_raw=False)

Or you can do progeny analysis directly in R:

library(Seurat) library(anndata) library(progeny) data <- read_h5ad('./all_adata_MC38.integrated_final_rawdata.h5ad') data_seurat <- CreateSeuratObject(counts = t(as.matrix(data$X)), meta.data = data$obs) #Standardized data is required For specific steps, please refer to the introduction of progeny:https://saezlab.github.io/progeny/articles/ProgenySingleCell.html

deeenes commented 1 year ago

@xiangrong7, yours is not the same error, it happens on a completely different line. The error that you encountered is likely due to a random network failure, your computer failed to download a file. You can get rid of the error by trying again on a good quality network:

from pypath.share import curl
import decoupler as dc

with curl.cache_off():
    dc.get_progeny(organism = 'Mus musculus', top = 100)
deeenes commented 1 year ago

Hello @jpfeil,

Apologies about the slow answer. I have to correct my previous comment, actually your issue is very similar to @xiangrong7's, it is a transient networking error. The traceback is long, but the most downstream element that we can identify is the call at pypath/utils/mapping.py:2599: it wants to translate secondary UniProt IDs to primary ones. Then normally a download happens which looks like this in the log:

    116 [2023-01-18 15:12:04] [mapping] Requested to load ID translation table from `uniprot-sec` to `uniprot-pri`, organism: 9606.
    117 [2023-01-18 15:12:04] [mapping] Chosen built-in defined ID translation table: resource=basic, id_type_a=uniprot-sec, id_type_b=uniprot-pri
    118 [2023-01-18 15:12:04] [inputs] Selecting input method (step 1): module `uniprot.get_uniprot_sec`, method `None`.
    119 [2023-01-18 15:12:04] [inputs] Selecting input method (step 2): module `pypath.inputs.uniprot`, method `get_uniprot_sec`.
    120 [2023-01-18 15:12:04] [inputs] Importing module `pypath.inputs.uniprot`.
    121 [2023-01-18 15:12:04] [mapping] Loading mapping table for organism `9606` with identifiers `uniprot-sec` and `uniprot-pri`, input type `file`
    122 [2023-01-18 15:12:04] [mapping] Reader created for ID translation table, parameters: `ncbi_tax_id=9606, id_a=uniprot-sec, id_b=uniprot-pri, load_a_to_b=1, load_b_to_a=0, input_type=file (FileMapping)`.
    123 [2023-01-18 15:12:04] [inputs] Selecting input method (step 1): module `uniprot.get_uniprot_sec`, method `None`.
    124 [2023-01-18 15:12:04] [inputs] Selecting input method (step 2): module `pypath.inputs.uniprot`, method `get_uniprot_sec`.
    125 [2023-01-18 15:12:04] [inputs] Importing module `pypath.inputs.uniprot`.
    126 [2023-01-18 15:12:04] [uniprot_input] Loading list of all UniProt IDs for organism `9606` (only SwissProt: None).
    127 [2023-01-18 15:12:04] [curl] Creating Curl object to retrieve data from `https://legacy.uniprot.org/uniprot/`
    128 [2023-01-18 15:12:04] [curl] GET parameters added to the URL: `query=organism%3A9606&format=tab&columns=id`
    129 [2023-01-18 15:12:04] [curl] Cache file path: `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`
    130 [2023-01-18 15:12:04] [curl] Setting up and calling pycurl.
    131 [2023-01-18 15:12:25] [curl] Opening file `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`
    132 [2023-01-18 15:12:25] [curl] Extracting data from file type `plain`
    133 [2023-01-18 15:12:25] [curl] Opening plain text file `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`.
    134 [2023-01-18 15:12:25] [curl] Contents of `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-` has been read and the file has been closed.
    135 [2023-01-18 15:12:25] [curl] File at `https://legacy.uniprot.org/uniprot/?query=organism%3A9606&format=tab&columns=id` successfully retrieved. Resulted file type `plain text, unicode string`. Local file at
    136                       `/home/denes/pypath/nocache/cache/590b20597cb4f25fe88d504e4dc430df-`.
    137 [2023-01-18 15:12:25] [curl] Creating Curl object to retrieve data from `ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/sec_ac.txt`
    138 [2023-01-18 15:12:25] [curl] Cache file path: `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`
    139 [2023-01-18 15:12:25] [curl] Setting up and calling pycurl.
    140 [2023-01-18 15:12:27] [curl] Opening file `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`
    141 [2023-01-18 15:12:27] [curl] Extracting data from file type `plain`
    142 [2023-01-18 15:12:27] [curl] Opening plain text file `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`.
    143 [2023-01-18 15:12:27] [curl] File at `ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/sec_ac.txt` successfully retrieved. Resulted file type `plain text, file object`. Local file at
    144                       `/home/denes/pypath/nocache/cache/7814fe9dc734379a8c28d4b1478d2f85-sec_ac.txt`.

The solution is similar to the one suggested above: try again this particular download with the cache turned off:

from pypath.utils import mapping
from pypath.share import curl

with curl.cache_off():
    mapping.get_mapper().primary_uniprot(['P00533'])

I hope this helps, please let me know if you're still experiencing any issue.

Best,

Denes

SZ-qing commented 1 year ago

I used the method you mentioned above.I don't know if other people's problems have been solved, but my problems still exist.

<class 'TypeError'>, 'NoneType' object is not iterable, <traceback object at 0x2b65030c4040> Traceback (most recent call last): File "", line 1, in File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/decoupler/omnip.py", line 139, in get_progeny p = translate_net( File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/decoupler/omnip.py", line 432, in translate_net _target_organism = taxonomy.ensure_ncbi_tax_id(target_organism) File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 367, in ensure_ncbi_tax_id taxid_from_common_name(taxon_id) or File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 283, in taxid_from_common_name common_to_ncbi = get_db('common') File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 435, in get_db init_db(key) File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/utils/taxonomy.py", line 450, in init_db ncbi_data = uniprot_input.uniprot_ncbi_taxids_2() File "/work/home/nierq01/anaconda3/envs/progeny/lib/python3.10/site-packages/pypath/inputs/uniprot.py", line 1301, in uniprot_ncbi_taxids_2 for line in c.result: TypeError: 'NoneType' object is not iterable

deeenes commented 1 year ago

Hi @nierq, I'm sorry to hear this error is still with you. I've just run the above function successfully, suggesting that this is a download issue similar to the others above. These kind of errors are not uncommon, but having them so often suggests a really bad connection or some other issue. To know more we need curl debug logs:

from pypath.share import curl
from pypath.inputs import uniprot
import pypath

with curl.cache_off(), curl.debug_on():
    taxons = uniprot.uniprot_ncbi_taxids_2()

pypath.log()

If the download is not successful, the above log could give useful information. This file is not too large (2.3 MB), repeated failure of downloading it suggests some fundamental issue. Can you download it from the CLI?

curl -LOvvv  https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/docs/speclist.txt

Can you open the URL above in your browser?

jchenpku commented 1 month ago

I also encountered a problem:

{
    "name": "TypeError",
    "message": "object of type 'NoneType' has no len()",
    "stack": "---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[32], line 1
----> 1 msigdb = dc.get_resource('MSigDB', organism = \"mouse\")
      2 msigdb

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/decoupler/omnip.py:360, in get_resource(name, organism, genesymbol_resource, **kwargs)
    358 df.columns = list(df.columns)
    359 df = df.reset_index()
--> 360 df = _annotation_identifiers(df, organism, genesymbol_resource)
    361 df = df.drop(columns=['record_id', 'uniprot', 'entity_type', 'source'])
    362 df = op._misc.dtypes.auto_dtype(df)

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/decoupler/omnip.py:919, in _annotation_identifiers(net, organism, genesymbol_resource)
    906 def _annotation_identifiers(
    907         net: pd.DataFrame,
    908         organism: str | int,
   (...)
    914         ) = None,
    915         ) -> pd.DataFrame:
    917     if not _is_human(organism):
--> 919         net = translate_net(
    920             net,
    921             columns='uniprot',
    922             id_type='uniprot',
    923             source_organism=9606,
    924             target_organism=organism,
    925         )
    927     if genesymbol_resource is False:
    929         net['genesymbol'] = net['uniprot']

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/decoupler/omnip.py:732, in translate_net(net, columns, source_organism, target_organism, id_type, unique_by, **kwargs)
    729     return net
    731 _check_if_pypath()
--> 732 from pypath.utils import orthology
    733 from pypath.share import common
    734 from pypath.utils import taxonomy

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/pypath/utils/__init__.py:20
      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 
   (...)
     17 #  Website: https://pypath.omnipathdb.org/
     18 #
---> 20 from . import orthology as homology

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/pypath/utils/orthology.py:45
     42 import timeloop
     43 import pandas as pd
---> 45 import pypath.utils.mapping as mapping
     46 import pypath.share.common as common
     47 import pypath_common._constants as _const

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/pypath/utils/mapping.py:69
     67 import pypath.share.curl as curl
     68 import pypath.inputs as inputs
---> 69 import pypath.inputs.uniprot as uniprot_input
     70 import pypath.inputs.uniprot_db as uniprot_db
     71 import pypath.inputs.pro as pro_input

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/pypath/inputs/uniprot.py:41
     39 import pypath.share.common as common
     40 import pypath_common._constants as _const
---> 41 import pypath.utils.taxonomy as taxonomy
     42 from pypath.inputs.uniprot_idmapping import idtypes as idmapping_idtypes
     44 _logger = session_mod.Logger(name = 'uniprot_input')

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/pypath/utils/taxonomy.py:85
     46 # XXX: Shouldn't we keep all functions and variables separated
     47 #      (together among them)?
     48 taxids = {
     49     9606: 'human',
     50     10090: 'mouse',
   (...)
     77     9544: 'rhesus macaque',
     78 }
     80 taxids2 = dict(
     81     (
     82         t.taxon_id,
     83         t.common_name.lower()
     84     )
---> 85     for t in ensembl_input.ensembl_organisms()
     86 )
     88 taxa = common.swap_dict_simple(taxids)
     89 taxa2 = common.swap_dict_simple(taxids2)

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/pypath/inputs/ensembl.py:52, in ensembl_organisms()
     49 with warnings.catch_warnings():
     51     warnings.simplefilter('ignore', bs4.XMLParsedAsHTMLWarning)
---> 52     soup = bs4.BeautifulSoup(c.result, 'html.parser')
     54 for r in soup.find('table').find_all('tr'):
     56     if not record:

File ~/miniconda3/envs/decoupler/lib/python3.12/site-packages/bs4/__init__.py:315, in BeautifulSoup.__init__(self, markup, features, builder, parse_only, from_encoding, exclude_encodings, element_classes, **kwargs)
    313 if hasattr(markup, 'read'):        # It's a file-type object.
    314     markup = markup.read()
--> 315 elif len(markup) <= 256 and (
    316         (isinstance(markup, bytes) and not b'<' in markup)
    317         or (isinstance(markup, str) and not '<' in markup)
    318 ):
    319     # Issue warnings for a couple beginner problems
    320     # involving passing non-markup to Beautiful Soup.
    321     # Beautiful Soup will still parse the input as markup,
    322     # since that is sometimes the intended behavior.
    323     if not self._markup_is_url(markup):
    324         self._markup_resembles_filename(markup)                

TypeError: object of type 'NoneType' has no len()"
}
deeenes commented 2 weeks ago

This latter is a download failure, which is either accidental, or is specific to your network, system or environment.