TypeError: unhashable type: 'numpy.ndarray' when running perturbation_space #669

Open ernesto-iacucci opened 6 days ago

ernesto-iacucci commented 6 days ago


Hi, when I am running the code in "perturbation_space.ipynb" and I get the to section on logistic regression classifier space:

ps = psadata = ps.compute(adata, embedding_key="X_pca", target_col="perturbation_name") psadata

I get the following error:

TypeError Traceback (most recent call last) Cell In[24], line 2 1 ps = ----> 2 psadata = ps.compute(adata, embedding_key="X_pca", target_col="perturbation_name") 3 psadata

File ~/anaconda3/lib/python3.10/site-packages/pertpy/tools/_perturbation_space/, in LRClassifierSpace.compute(self, adata, target_col, layer_key, embedding_key, test_split_size, max_iter) 81 # Save adata observations for embedding annotations in get_embeddings 82 adata_obs = adata.obs.reset_index(drop=True) ---> 83 adata_obs = adata_obs.groupby(target_col).agg( 84 lambda pert_group: np.nan if len(set(pert_group)) != 1 else list(set(pert_group))[0] 85 ) 87 # Fit a logistic regression model for each perturbation 88 regression_model = LogisticRegression(max_iter=max_iter, class_weight="balanced")

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs) 1480 gba = GroupByApply(self, [func], args=(), kwargs={}) 1481 try: -> 1482 result = gba.agg() 1484 except ValueError as err: 1485 if "No objects to concatenate" not in str(err):

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/, in Apply.agg(self) 190 return self.agg_dict_like() 191 elif is_list_like(func): 192 # we require a list, but not a 'str' --> 193 return self.agg_list_like() 195 if callable(func): 196 f = com.get_cython_func(func)

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/, in Apply.agg_list_like(self) 318 def agg_list_like(self) -> DataFrame | Series: 319 """ 320 Compute aggregation in the case of a list-like argument. 321 (...) 324 Result of aggregation. 325 """ --> 326 return self.agg_or_apply_list_like(op_name="agg")

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/, in GroupByApply.agg_or_apply_list_like(self, op_name) 1566 # Only set as_index=True on groupby objects, not Window or Resample 1567 # that inherit from this class. 1568 with com.temp_setattr( 1569 obj, "as_index", True, condition=hasattr(obj, "as_index") 1570 ): -> 1571 keys, results = self.compute_list_like(op_name, selected_obj, kwargs) 1572 result = self.wrap_results_list_like(keys, results) 1573 return result

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/, in Apply.compute_list_like(self, op_name, selected_obj, kwargs) 379 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) 380 args = ( 381 [self.axis, self.args] 382 if include_axis(op_name, colg) 383 else self.args 384 ) --> 385 new_res = getattr(colg, op_name)(func, args, **kwargs) 386 results.append(new_res) 387 indices.append(index)

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, *kwargs) 255 kwargs["engine"] = engine 256 kwargs["engine_kwargs"] = engine_kwargs --> 257 ret = self._aggregate_multiple_funcs(func, args, **kwargs) 258 if relabeling: 259 # columns is not narrowed by mypy from relabeling flag 260 assert columns is not None # for mypy

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in SeriesGroupBy._aggregate_multiple_funcs(self, arg, *args, *kwargs) 360 for idx, (name, func) in enumerate(arg): 361 key = base.OutputKey(label=name, position=idx) --> 362 results[key] = self.aggregate(func, args, **kwargs) 364 if any(isinstance(x, DataFrame) for x in results.values()): 365 from pandas import concat

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in SeriesGroupBy.aggregate(self, func, engine, engine_kwargs, *args, kwargs) 291 return self._python_agg_general(func, *args, *kwargs) 293 try: --> 294 return self._python_agg_general(func, args, kwargs) 295 except KeyError: 296 # KeyError raised in test_groupby.test_basic is bc the func does 297 # a dictionary lookup on, but group name is not 298 # pinned in _python_agg_general, only in _aggregate_named 299 result = self._aggregate_named(func, *args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in SeriesGroupBy._python_agg_general(self, func, *args, *kwargs) 324 f = lambda x: func(x, args, **kwargs) 326 obj = self._obj_with_exclusions --> 327 result = self._grouper.agg_series(obj, f) 328 res = obj._constructor(result, 329 return self._wrap_aggregated_output(res)

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in BaseGrouper.agg_series(self, obj, func, preserve_dtype) 857 if not isinstance(obj._values, np.ndarray): 858 # we can preserve a little bit more aggressively with EA dtype 859 # because maybe_cast_pointwise_result will do a try/except 860 # with _from_sequence. NB we are assuming here that _from_sequence 861 # is sufficiently strict that it casts appropriately. 862 preserve_dtype = True --> 864 result = self._aggregate_series_pure_python(obj, func) 866 npvalues = lib.maybe_convert_objects(result, try_float=False) 867 if preserve_dtype:

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in BaseGrouper._aggregate_series_pure_python(self, obj, func) 882 splitter = self._get_splitter(obj, axis=0) 884 for i, group in enumerate(splitter): --> 885 res = func(group) 886 res = extract_result(res) 888 if not initialized: 889 # We only do this validation on the first iteration

File ~/anaconda3/lib/python3.10/site-packages/pandas/core/groupby/, in SeriesGroupBy._python_agg_general..(x) 322 alias = com._builtin_table_alias[func] 323 warn_alias_replacement(self, orig_func, alias) --> 324 f = lambda x: func(x, *args, **kwargs) 326 obj = self._obj_with_exclusions 327 result = self._grouper.agg_series(obj, f)

File ~/anaconda3/lib/python3.10/site-packages/pertpy/tools/_perturbation_space/, in LRClassifierSpace.compute..(pert_group) 81 # Save adata observations for embedding annotations in get_embeddings 82 adata_obs = adata.obs.reset_index(drop=True) 83 adata_obs = adata_obs.groupby(target_col).agg( ---> 84 lambda pert_group: np.nan if len(set(pert_group)) != 1 else list(set(pert_group))[0] 85 ) 87 # Fit a logistic regression model for each perturbation 88 regression_model = LogisticRegression(max_iter=max_iter, class_weight="balanced")

TypeError: unhashable type: 'numpy.ndarray'

Version information

anndata 0.10.8 pandas 2.2.3 pertpy 0.9.4 scanpy 1.10.3 session_info 1.0.0

IPython 8.26.0 jupyter_client 8.6.2 jupyter_core 5.7.2 jupyterlab 4.2.4 notebook 7.2.1

Python 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0] Linux-5.10.226-214.879.amzn2.x86_64-x86_64-with-glibc2.26

Session information updated at 2024-10-14 22:51

Lilly-May commented 4 days ago

Hi @ernesto-iacucci! Thanks for reporting this issue. I just tried, but I couldn’t reproduce your error. Are you using the dataset from the tutorial (pt.dt.norman_2019()) or your own data?

ernesto-iacucci commented 3 days ago

Hi, I am using: adata = pt.dt.norman_2019()