Closed dschrein closed 6 years ago
Dear,
Could you tell me the version of pandas you are using? It should be 0.20.1 or later. If not this would explain the error message.
Using the latest version of pandas, I tried to reproduce this error myself on the example dataset but did not succeed.
Thanks, Bram
pandas==0.19.2
thanks - i will try after upgrading! ... fixed - thank you!
note that you have to use pd.read_pickle instead of pickle.load if you've dumped df using an older version of pandas.
@bramvds Hi, I had the same error information, but my pandas version is '0.25.3'. Could you please help me?
>>> pd.__version__
'0.25.3'
The code and error information:
>>>modules = list(modules_from_adjacencies(adjacencies, exprMat))
KeyError Traceback (most recent call last)
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'TF'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-b1bf361ac831> in <module>
----> 1 modules = list(modules_from_adjacencies(adjacencies, exprMat))
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pyscenic/utils.py in modules_from_adjacencies(adjacencies, ex_mtx, thresholds, top_n_targets, top_n_regulators, min_genes, absolute_thresholds, rho_dichotomize, keep_only_activating, rho_threshold, rho_mask_dropouts)
263 LOGGER.warn(f"Note on correlation calculation: the default behaviour for calculating the correlations has changed after pySCENIC verion 0.9.16. Previously, the default was to calculate the correlation between a TF and target gene using only cells with non-zero expression values (mask_dropouts=True). The current default is now to use all cells to match the behavior of the R verision of SCENIC. The original settings can be retained by setting 'rho_mask_dropouts=True' in the modules_from_adjacencies function, or '--mask_dropouts' from the CLI.\n\tDropout masking is currently set to [{rho_mask_dropouts}].")
264 adjacencies = add_correlation(adjacencies, ex_mtx,
--> 265 rho_threshold=rho_threshold, mask_dropouts=rho_mask_dropouts)
266 activating_modules = adjacencies[adjacencies[COLUMN_NAME_REGULATION] > 0.0]
267 if keep_only_activating:
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pyscenic/utils.py in add_correlation(adjacencies, ex_mtx, rho_threshold, mask_dropouts)
130 rhos = masked_rho4pairs(ex_mtx.values, col_idx_pairs, 0.0)
131 else:
--> 132 genes = list(set(adjacencies[COLUMN_NAME_TF]).union(set(adjacencies[COLUMN_NAME_TARGET])))
133 ex_mtx = ex_mtx[ex_mtx.columns[ex_mtx.columns.isin(genes)]]
134 corr_mtx = pd.DataFrame(index=ex_mtx.columns, columns=ex_mtx.columns,
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'TF'
The code I changed according to the "Warning" and its error:
>>>modules = list(modules_from_adjacencies(adjacencies, exprMat, rho_mask_dropouts=True))
AttributeError Traceback (most recent call last)
<ipython-input-44-b6a5e2c5e1cf> in <module>
----> 1 modules = list(modules_from_adjacencies(adjacencies, exprMat, rho_mask_dropouts=True))
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pyscenic/utils.py in modules_from_adjacencies(adjacencies, ex_mtx, thresholds, top_n_targets, top_n_regulators, min_genes, absolute_thresholds, rho_dichotomize, keep_only_activating, rho_threshold, rho_mask_dropouts)
263 LOGGER.warn(f"Note on correlation calculation: the default behaviour for calculating the correlations has changed after pySCENIC verion 0.9.16. Previously, the default was to calculate the correlation between a TF and target gene using only cells with non-zero expression values (mask_dropouts=True). The current default is now to use all cells to match the behavior of the R verision of SCENIC. The original settings can be retained by setting 'rho_mask_dropouts=True' in the modules_from_adjacencies function, or '--mask_dropouts' from the CLI.\n\tDropout masking is currently set to [{rho_mask_dropouts}].")
264 adjacencies = add_correlation(adjacencies, ex_mtx,
--> 265 rho_threshold=rho_threshold, mask_dropouts=rho_mask_dropouts)
266 activating_modules = adjacencies[adjacencies[COLUMN_NAME_REGULATION] > 0.0]
267 if keep_only_activating:
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pyscenic/utils.py in add_correlation(adjacencies, ex_mtx, rho_threshold, mask_dropouts)
127 if mask_dropouts:
128 ex_mtx = ex_mtx.sort_index(axis=1)
--> 129 col_idx_pairs = _create_idx_pairs(adjacencies, ex_mtx)
130 rhos = masked_rho4pairs(ex_mtx.values, col_idx_pairs, 0.0)
131 else:
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pyscenic/utils.py in _create_idx_pairs(adjacencies, exp_mtx)
68
69 # Create sorted list of genes that take part in a TF-target link.
---> 70 genes = set(adjacencies.TF).union(set(adjacencies.target))
71 sorted_genes = sorted(genes)
72
~/anaconda2/envs/Grim3.6.8/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
5177 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5178 return self[name]
-> 5179 return object.__getattribute__(self, name)
5180
5181 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'TF'
i have completed up through creation of the df object via:
then I get this KeyError:
Here's what the df object looks like:
Any ideas? Thanks in advance!