angelolab / ark-analysis

Integrated pipeline for multiplexed image analysis
https://ark-analysis.readthedocs.io/en/latest/
MIT License
70 stars 25 forks source link

`IntCastingNaNError` in 3_Pixie_Cluster_Cells notebook #1094

Open sarafiller opened 8 months ago

sarafiller commented 8 months ago

Hello all, I am trying to run pixie cell clustering and get the following error. Note that I was able to run the entire pipeline on a small sample image. On my real image, I get an error in the 3_Pixie_Cluster_Cells notebook. 2_Pixie_Cluster_Pixels completed without any errors. The step that produces the error- Generate the input data for the cell SOM. This computes the counts of each pixel cluster per cell, both raw and normalized by cell size. Here is the error that I get

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer: Error while type casting for column 'pixel_som_cluster_100'

Seems like it is happening at this step:

-->162     cell_table = cell_table.combine_first(num_cluster_per_seg_label)
    164 # NaN means the cluster wasn't found in the specified fov-cell pair

can you provide me with a bit more information on the comment that you have left in the code? Below is the full error stack:

---------------------------------------------------------------------------
IntCastingNaNError                        Traceback (most recent call last)
Cell In[8], line 11
      8     cluster_counts_size_norm = feather.read_dataframe(os.path.join(base_dir, cluster_counts_size_norm_name))
      9 else:
     10     # generate the preprocessed data 
---> 11     cluster_counts, cluster_counts_size_norm = cell_cluster_utils.create_c2pc_data(
     12         fovs, os.path.join(base_dir, pixel_data_dir), cell_table_path, pixel_cluster_col
     13     )
     15     # write both unnormalized and normalized input data for reference
     16     feather.write_dataframe(
     17         cluster_counts,
     18         os.path.join(base_dir, cluster_counts_name),
     19         compression='uncompressed'
     20     )

File ~\.conda\envs\pixie\lib\site-packages\ark\phenotyping\cell_cluster_utils.py:162, in create_c2pc_data(fovs, pixel_data_path, cell_table_path, pixel_cluster_col)
    160     # combine the data of num_cluster_per_seg_label into cell_table_indices
    161     num_cluster_per_seg_label = num_cluster_per_seg_label.set_index(cell_table_indices)
--> 162     cell_table = cell_table.combine_first(num_cluster_per_seg_label)
    164 # NaN means the cluster wasn't found in the specified fov-cell pair
    165 cell_table = cell_table.fillna(0)

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\frame.py:8549, in DataFrame.combine_first(self, other)
   8545 if len(other) == 0:
   8546     combined = self.reindex(
   8547         self.columns.append(other.columns.difference(self.columns)), axis=1
   8548     )
-> 8549     combined = combined.astype(other.dtypes)
   8550 else:
   8551     combined = self.combine(other, combiner, overwrite=False)

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\generic.py:6513, in NDFrame.astype(self, dtype, copy, errors)
   6511 else:
   6512     try:
-> 6513         res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
   6514     except ValueError as ex:
   6515         ex.args = (
   6516             f"{ex}: Error while type casting for column '{col_name}'",
   6517         )

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\generic.py:6534, in NDFrame.astype(self, dtype, copy, errors)
   6530     results = [ser.astype(dtype, copy=copy) for _, ser in self.items()]
   6532 else:
   6533     # else, only a single dtype is given
-> 6534     new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6535     res = self._constructor_from_mgr(new_data, axes=new_data.axes)
   6536     return res.__finalize__(self, method="astype")

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\internals\managers.py:414, in BaseBlockManager.astype(self, dtype, copy, errors)
    411 elif using_copy_on_write():
    412     copy = False
--> 414 return self.apply(
    415     "astype",
    416     dtype=dtype,
    417     copy=copy,
    418     errors=errors,
    419     using_cow=using_copy_on_write(),
    420 )

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\internals\managers.py:354, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
    352         applied = b.apply(f, **kwargs)
    353     else:
--> 354         applied = getattr(b, f)(**kwargs)
    355     result_blocks = extend_blocks(applied, result_blocks)
    357 out = type(self).from_blocks(result_blocks, self.axes)

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\internals\blocks.py:616, in Block.astype(self, dtype, copy, errors, using_cow)
    596 """
    597 Coerce to the new dtype.
    598 
   (...)
    612 Block
    613 """
    614 values = self.values
--> 616 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    618 new_values = maybe_coerce_values(new_values)
    620 refs = None

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\dtypes\astype.py:238, in astype_array_safe(values, dtype, copy, errors)
    235     dtype = dtype.numpy_dtype
    237 try:
--> 238     new_values = astype_array(values, dtype, copy=copy)
    239 except (ValueError, TypeError):
    240     # e.g. _astype_nansafe can fail on object-dtype of strings
    241     #  trying to convert to float
    242     if errors == "ignore":

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\dtypes\astype.py:183, in astype_array(values, dtype, copy)
    180     values = values.astype(dtype, copy=copy)
    182 else:
--> 183     values = _astype_nansafe(values, dtype, copy=copy)
    185 # in pandas we don't store numpy str dtypes, so convert to object
    186 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\dtypes\astype.py:101, in _astype_nansafe(arr, dtype, copy, skipna)
     96     return lib.ensure_string_array(
     97         arr, skipna=skipna, convert_na_value=False
     98     ).reshape(shape)
    100 elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
--> 101     return _astype_float_to_int_nansafe(arr, dtype, copy)
    103 elif arr.dtype == object:
    104     # if we have a datetime/timedelta array of objects
    105     # then coerce to datetime64[ns] and use DatetimeArray.astype
    107     if lib.is_np_dtype(dtype, "M"):

File ~\.conda\envs\pixie\lib\site-packages\pandas\core\dtypes\astype.py:146, in _astype_float_to_int_nansafe(values, dtype, copy)
    142 """
    143 astype with a check preventing converting NaN to an meaningless integer value.
    144 """
    145 if not np.isfinite(values).all():
--> 146     raise IntCastingNaNError(
    147         "Cannot convert non-finite values (NA or inf) to integer"
    148     )
    149 if dtype.kind == "u":
    150     # GH#45151
    151     if not (values >= 0).all():

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer: Error while type casting for column 'pixel_som_cluster_100'

Your timely help would be greatly appreciated. Thank you so much

alex-l-kong commented 8 months ago

@sarafiller that is definitely a weird error. Can you send me the version of Pandas that you have? In your Jupyter notebook, you can do that by creating a new cell and running:

import pandas as pd
print(pd.__version__)

You should be on the latest version at 2.X.X.