perslab / CELLEX

CELLEX (CELL-type EXpression-specificity)
GNU General Public License v3.0
36 stars 9 forks source link

.txt.gz files for "mapping"-functions are not found #17

Closed bengnielsen closed 4 years ago

bengnielsen commented 4 years ago

User DaianeH has experienced following error when attempting to use cellex.utils.mapping.mgi_mouse_to_ens_mouse() function:

eso_mapped = cellex.utils.mapping.mgi_mouse_to_ens_mouse(eso.results["esmu"])
Traceback (most recent call last):
File "", line 1, in
File "/hpc/packages/minerva-centos7/py_packages/3.7/lib/python3.7/site-packages/cellex-1.0.1-py3.7.egg/cellex/utils/mapping/mgi_mouse_to_ens_mouse.py", line 30, in mgi_mouse_to_ens_mouse
File "/hpc/packages/minerva-centos7/python/3.7.3/lib/python3.7/site-packages/pkg_resources/init.py", line 1151, in resource_stream
self, resource_name
File "/hpc/packages/minerva-centos7/python/3.7.3/lib/python3.7/site-packages/pkg_resources/init.py", line 1398, in get_resource_stream
return io.BytesIO(self.get_resource_string(manager, resource_name))
File "/hpc/packages/minerva-centos7/python/3.7.3/lib/python3.7/site-packages/pkg_resources/init.py", line 1401, in get_resource_string
return self._get(self._fn(self.module_path, resource_name))
File "/hpc/packages/minerva-centos7/python/3.7.3/lib/python3.7/site-packages/pkg_resources/init.py", line 1540, in _get
return self.loader.get_data(path)
OSError: [Errno 0] Error: 'cellex/utils/mapping/maps/Mus_musculus.GRCm38.90.gene_name_version2ensembl.txt.gz' 

Indicating that Mus_musculus.GRCm38.90.gene_name_version2ensembl.txt.gz was not found. I replicated the error and also got a

FileNotFoundError: [Errno 2] No such file or directory: 'CELLEX/cellex/utils/mapping/maps/Mus_musculus.GRCm38.90.gene_name_version2ensembl.txt.gz'

at the end.

Tried to use other map functions and they returned same error for their corresponding files used for mapping cellex.utils.mapping.ens_mouse_to_ens_human():

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-8-74444136e9e6> in <module>
----> 1 cellex.utils.mapping.ens_mouse_to_ens_human(df.iloc[:,0])

~/miniconda3/envs/cellex/lib/python3.6/site-packages/cellex/utils/mapping/ens_mouse_to_ens_human.py in ens_mouse_to_ens_human(df_unmapped, drop_unmapped, verbose)
     35     fp_mapping_file = "CELLEX/cellex/utils/mapping/maps/hsapiens_mmusculus_unique_orthologs.GRCh37.ens_v91.txt.gz"
     36 
---> 37     df_map = pd.read_csv(fp_mapping_file, delim_whitespace=True)
     38 
     39     # create dictionary for mapping mouse ensemble gene id's to human ensembl gene id's

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    683         )
    684 
--> 685         return _read(filepath_or_buffer, kwds)
    686 
    687     parser_f.__name__ = name

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    455 
    456     # Create the parser.
--> 457     parser = TextFileReader(fp_or_buf, **kwds)
    458 
    459     if chunksize or iterator:

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    893             self.options["has_index_names"] = kwds["has_index_names"]
    894 
--> 895         self._make_engine(self.engine)
    896 
    897     def close(self):

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1133     def _make_engine(self, engine="c"):
   1134         if engine == "c":
-> 1135             self._engine = CParserWrapper(self.f, **self.options)
   1136         else:
   1137             if engine == "python":

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1904         kwds["usecols"] = self.usecols
   1905 
-> 1906         self._reader = parsers.TextReader(src, **kwds)
   1907         self.unnamed_cols = self._reader.unnamed_cols
   1908 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

~/miniconda3/envs/cellex/lib/python3.6/gzip.py in __init__(self, filename, mode, compresslevel, fileobj, mtime)
    161             mode += 'b'
    162         if fileobj is None:
--> 163             fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
    164         if filename is None:
    165             filename = getattr(fileobj, 'name', '')

FileNotFoundError: [Errno 2] No such file or directory: 'CELLEX/cellex/utils/mapping/maps/hsapiens_mmusculus_unique_orthologs.GRCh37.ens_v91.txt.gz' 

cellex.utils.mapping.ens_human_to_symbol()

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-9-6808aa31ce1a> in <module>
----> 1 cellex.utils.mapping.ens_human_to_symbol(df.iloc[:,0])

~/miniconda3/envs/cellex/lib/python3.6/site-packages/cellex/utils/mapping/ens_human_to_symbol.py in ens_human_to_symbol(df_unmapped, drop_unmapped, verbose)
     36     fp_mapping_file = "CELLEX/cellex/utils/mapping/maps/GRCh38.ens_v90.ensembl2gene_name_version.txt.gz"
     37 
---> 38     df_map = pd.read_csv(fp_mapping_file, delim_whitespace=True, compression="gzip")
     39 
     40     # create dictionary for mapping human ensemble gene id's to gene names

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    683         )
    684 
--> 685         return _read(filepath_or_buffer, kwds)
    686 
    687     parser_f.__name__ = name

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    455 
    456     # Create the parser.
--> 457     parser = TextFileReader(fp_or_buf, **kwds)
    458 
    459     if chunksize or iterator:

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    893             self.options["has_index_names"] = kwds["has_index_names"]
    894 
--> 895         self._make_engine(self.engine)
    896 
    897     def close(self):

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
   1133     def _make_engine(self, engine="c"):
   1134         if engine == "c":
-> 1135             self._engine = CParserWrapper(self.f, **self.options)
   1136         else:
   1137             if engine == "python":

~/miniconda3/envs/cellex/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1904         kwds["usecols"] = self.usecols
   1905 
-> 1906         self._reader = parsers.TextReader(src, **kwds)
   1907         self.unnamed_cols = self._reader.unnamed_cols
   1908 

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

~/miniconda3/envs/cellex/lib/python3.6/gzip.py in __init__(self, filename, mode, compresslevel, fileobj, mtime)
    161             mode += 'b'
    162         if fileobj is None:
--> 163             fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
    164         if filename is None:
    165             filename = getattr(fileobj, 'name', '')

FileNotFoundError: [Errno 2] No such file or directory: 'CELLEX/cellex/utils/mapping/maps/GRCh38.ens_v90.ensembl2gene_name_version.txt.gz'
tstannius commented 4 years ago

Hotfixed in v1.1.1.