alevax / pyviper

Porting of Protein Activity and Pathway Inference to single cell and Python.
MIT License
10 stars 0 forks source link

Issue with NaRnEA and cell names with same ID #75

Open LucaZanella15 opened 4 months ago

LucaZanella15 commented 4 months ago

When running NaRnEA with cells having the same name the following error arises:


ValueError Traceback (most recent call last) Cell In[187], line 4 1 #adata_PA = pyviper.viper(gex_data=adata_combined, interactome=list(a3_nets.values()), enrichment="narnea", eset_filter=False, store_input_data=True) 2 #adata_PA = pyviper.viper(gex_data=adata_combined, interactome=glioma_net_integrated, enrichment="narnea", eset_filter=False, store_input_data=True) 3 #adata_PA = pyviper.viper(gex_data=adata_combined, interactome=list(a3_nets.values()), enrichment="area", eset_filter=False, store_input_data=True) ----> 4 adata_PA = pyviper.viper(gex_data=adata_combined, interactome=a3_nets["AP2574"], enrichment="narnea", eset_filter=False, store_input_data=True)

File ~/Desktop/ColumbiaProjects/pyviper/pyviper/_viper.py:240, in viper(gex_data, interactome, layer, eset_filter, method, enrichment, mvws, min_targets, njobs, batch_size, verbose, output_as_anndata, transfer_obs, store_input_data) 237 if verbose: print("Computing regulons enrichment with NaRnEa") 239 if njobs==1: --> 240 preOp = NaRnEA( 241 gex_df, 242 interactome, layer, eset_filter, 243 min_targets, verbose 244 ) 245 else: 246 results = Parallel(njobs)( 247 delayed(NaRnEA)( 248 gex_df.iloc[batch_ibatch_size:batch_ibatch_size+batch_size], (...) 251 ) for batch_i in range(n_batches) 252 )

File ~/Desktop/ColumbiaProjects/pyviper/pyviper/NaRnEA/NaRnEA_meta.py:182, in NaRnEA(gex_data, interactome, layer, eset_filter, min_targets, verbose) 179 pd.options.mode.chained_assignment = None 181 if isinstance(interactome, Interactome): --> 182 return NaRnEA_classic(gex_data, interactome, layer, eset_filter, min_targets, verbose) 183 elif len(interactome) == 1: 184 return NaRnEA_classic(gex_data, interactome[0], layer, eset_filter, min_targets, verbose)

File ~/Desktop/ColumbiaProjects/pyviper/pyviper/NaRnEA/NaRnEA_classic.py:243, in NaRnEA_classic(gex_data, interactome, layer, eset_filter, min_targets, verbose, return_as_df) 240 dat2 = dat1.merge(filtered_table, on='target', how = 'left') 241 # should I use left or inner? #no. have to use left join 242 #dat2.dropna(inplace= True) --> 243 AM_mat = dat2.pivot(index='target',columns='regulator',values = 'mor').fillna(0) 244 AM_mat = AM_mat[AM_mat.columns.dropna()] 247 AW_mat = dat2.pivot(index='target',columns='regulator',values = 'likelihood').fillna(0)

File ~/mambaforge/lib/python3.10/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments..decorate..wrapper(*args, *kwargs) 325 if len(args) > num_allow_args: 326 warnings.warn( 327 msg.format(arguments=_format_argument_list(allow_args)), 328 FutureWarning, 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(args, **kwargs)

File ~/mambaforge/lib/python3.10/site-packages/pandas/core/frame.py:8567, in DataFrame.pivot(self, index, columns, values) 8561 @Substitution("") 8562 @Appender(_shared_docs["pivot"]) 8563 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 8564 def pivot(self, index=None, columns=None, values=None) -> DataFrame: 8565 from pandas.core.reshape.pivot import pivot -> 8567 return pivot(self, index=index, columns=columns, values=values)

File ~/mambaforge/lib/python3.10/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments..decorate..wrapper(*args, *kwargs) 325 if len(args) > num_allow_args: 326 warnings.warn( 327 msg.format(arguments=_format_argument_list(allow_args)), 328 FutureWarning, 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(args, **kwargs)

File ~/mambaforge/lib/python3.10/site-packages/pandas/core/reshape/pivot.py:540, in pivot(data, index, columns, values) 536 indexed = data._constructor_sliced(data[values]._values, index=multiindex) 537 # error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union 538 # [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected 539 # "Hashable" --> 540 return indexed.unstack(columns_listlike)

File ~/mambaforge/lib/python3.10/site-packages/pandas/core/series.py:4455, in Series.unstack(self, level, fill_value) 4412 """ 4413 Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. 4414 (...) 4451 b 2 4 4452 """ 4453 from pandas.core.reshape.reshape import unstack -> 4455 return unstack(self, level, fill_value)

File ~/mambaforge/lib/python3.10/site-packages/pandas/core/reshape/reshape.py:489, in unstack(obj, level, fill_value) 487 if is_1d_only_ea_dtype(obj.dtype): 488 return _unstack_extension_series(obj, level, fill_value) --> 489 unstacker = _Unstacker( 490 obj.index, level=level, constructor=obj._constructor_expanddim 491 ) 492 return unstacker.get_result( 493 obj._values, value_columns=None, fill_value=fill_value 494 )

File ~/mambaforge/lib/python3.10/site-packages/pandas/core/reshape/reshape.py:137, in _Unstacker.init(self, index, level, constructor) 129 if num_cells > np.iinfo(np.int32).max: 130 warnings.warn( 131 f"The following operation may generate {num_cells} cells " 132 f"in the resulting pandas object.", 133 PerformanceWarning, 134 stacklevel=find_stack_level(), 135 ) --> 137 self._make_selectors()

File ~/mambaforge/lib/python3.10/site-packages/pandas/core/reshape/reshape.py:189, in _Unstacker._make_selectors(self) 186 mask.put(selector, True) 188 if mask.sum() < len(self.index): --> 189 raise ValueError("Index contains duplicate entries, cannot reshape") 191 self.group_index = comp_index 192 self.mask = mask

ValueError: Index contains duplicate entries, cannot reshape