cmap / cmapPy

Assorted tools for interacting with .gct, .gctx files and other Connectivity Map (Broad Institute) data/tools
https://clue.io/cmapPy/index.html
BSD 3-Clause "New" or "Revised" License
126 stars 76 forks source link

Cannot write back GCT file that has been parsed #78

Open Munchic opened 2 years ago

Munchic commented 2 years ago

I am trying to do just a simple test of reading in an already existing GCT file and then writing it back but this fails. The same GCT file works in cmapR. Thank you for your help!

I'm using Python 3.8.9.

Here's the code:

from cmapPy.pandasGEXpress.parse import parse as parse_gct
from cmapPy.pandasGEXpress.write_gct import write as write_gct

plate34_dda = parse_gct("LINCS_P100_DIA_Plate34_annotated_minimized_2018-05-02_19-56-02.processed.gct")
write_gct(plate34_dda, "test_lvl4")  # this fails

Here's the error:

---------------------------------------------------------------------------
InvalidIndexError                         Traceback (most recent call last)
Input In [16], in <cell line: 1>()
----> 1 write_gct(plate34_dda, "test_lvl4")

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/cmapPy/pandasGEXpress/write_gct.py:43, in write(gctoo, out_fname, data_null, metadata_null, filler_null, data_float_format)
     40 write_version_and_dims(VERSION, dims, f)
     42 # Write top half of the gct
---> 43 write_top_half(f, gctoo.row_metadata_df, gctoo.col_metadata_df,
     44                metadata_null, filler_null)
     46 # Write bottom half of the gct
     47 write_bottom_half(f, gctoo.row_metadata_df, gctoo.data_df,
     48                   data_null, data_float_format, metadata_null)

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/cmapPy/pandasGEXpress/write_gct.py:98, in write_top_half(f, row_metadata_df, col_metadata_df, metadata_null, filler_null)
     95 col_metadata_indices = (range(1, top_half_df.shape[0]),
     96                         range(1 + row_metadata_df.shape[1], top_half_df.shape[1]))
     97 # pd.DataFrame.at to insert into dataframe(python3)
---> 98 top_half_df.at[col_metadata_indices[0], col_metadata_indices[1]] = (
     99     col_metadata_df.astype(str).replace("nan", value=metadata_null).T.values)
    101 # Write top_half_df to file
    102 top_half_df.to_csv(f, header=False, index=False, sep="\t")

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/pandas/core/indexing.py:2273, in _AtIndexer.__setitem__(self, key, value)
   2270     self.obj.loc[key] = value
   2271     return
-> 2273 return super().__setitem__(key, value)

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/pandas/core/indexing.py:2228, in _ScalarAccessIndexer.__setitem__(self, key, value)
   2225 if len(key) != self.ndim:
   2226     raise ValueError("Not enough indexers for scalar access (setting)!")
-> 2228 self.obj._set_value(*key, value=value, takeable=self._takeable)

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/pandas/core/frame.py:3870, in DataFrame._set_value(self, index, col, value, takeable)
   3867     series._set_value(index, value, takeable=True)
   3868     return
-> 3870 series = self._get_item_cache(col)
   3871 loc = self.index.get_loc(index)
   3872 dtype = series.dtype

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/pandas/core/frame.py:3939, in DataFrame._get_item_cache(self, item)
   3934 res = cache.get(item)
   3935 if res is None:
   3936     # All places that call _get_item_cache have unique columns,
   3937     #  pending resolution of GH#33047
-> 3939     loc = self.columns.get_loc(item)
   3940     res = self._ixs(loc, axis=1)
   3942     cache[item] = res

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/pandas/core/indexes/range.py:388, in RangeIndex.get_loc(self, key, method, tolerance)
    386         except ValueError as err:
    387             raise KeyError(key) from err
--> 388     self._check_indexing_error(key)
    389     raise KeyError(key)
    390 return super().get_loc(key, method=method, tolerance=tolerance)

File ~/.local/share/virtualenvs/alphamap-6YdFO0zX/lib/python3.8/site-packages/pandas/core/indexes/base.py:5637, in Index._check_indexing_error(self, key)
   5633 def _check_indexing_error(self, key):
   5634     if not is_scalar(key):
   5635         # if key is not a scalar, directly raise an error (the code below
   5636         # would convert to numpy arrays and raise later any way) - GH29926
-> 5637         raise InvalidIndexError(key)

InvalidIndexError: range(7, 103)
levlitichev commented 2 years ago

Your code worked for using Python 2. Here is how I created my conda env:

conda create -n cmappy_py2 -c bioconda cmappy python=2