holoviz / hvplot

A high-level plotting API for pandas, dask, xarray, and networkx built on HoloViews
https://hvplot.holoviz.org
BSD 3-Clause "New" or "Revised" License
1.15k stars 108 forks source link

unpack interactive dataframe `.shape` got KeyError #1116

Open davycloud opened 1 year ago

davycloud commented 1 year ago

Version

python: 3.11
jupyterlab: 4.0.3

pandas: 2.0.2
numpy: 1.24.3

hvplot: 0.8.4

Problem

I was trying this demo, found interactive dataframe went error in two places:

np.nanmean and _, cols = dfi.shape

Code

I can reproduce the issue with the following code:

import pandas as pd
import numpy as np
import hvplot.pandas

df = pd.DataFrame(np.random.randn(5, 5))
dfi = df.interactive()

Problem 1

np.nanmean(df, axis=0)   # OK
# array([ 0.09263235, -0.61521997,  0.87237865, -0.65929222, -0.61866116])

np.nanmean(dfi, axis=0)  # result is nan with warning: RuntimeWarning: Mean of empty slice
# nan

Problem 2

rows, cols = dfi.shape
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File D:\Code\.Envs\work\Lib\site-packages\pandas\core\indexes\base.py:3652, in Index.get_loc(self, key)
   3651 try:
-> 3652     return self._engine.get_loc(casted_key)
   3653 except KeyError as err:

File D:\Code\.Envs\work\Lib\site-packages\pandas\_libs\index.pyx:147, in pandas._libs.index.IndexEngine.get_loc()

File D:\Code\.Envs\work\Lib\site-packages\pandas\_libs\index.pyx:176, in pandas._libs.index.IndexEngine.get_loc()

File pandas\_libs\hashtable_class_helper.pxi:7080, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas\_libs\hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 1

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[7], line 1
----> 1 rows, cols = dfi.shape

File D:\Code\.Envs\work\Lib\site-packages\hvplot\interactive.py:653, in Interactive.__getitem__(self, other)
    651 def __getitem__(self, other):
    652     other = other._transform if isinstance(other, Interactive) else other
--> 653     return self._apply_operator(operator.getitem, other)

File D:\Code\.Envs\work\Lib\site-packages\hvplot\interactive.py:539, in Interactive._apply_operator(self, operator, reverse, *args, **kwargs)
    537 transform = new._transform
    538 transform = type(transform)(transform, operator, *args, reverse=reverse)
--> 539 return new._clone(transform)

File D:\Code\.Envs\work\Lib\site-packages\hvplot\interactive.py:388, in Interactive._clone(self, transform, plot, loc, center, dmap, copy, max_rows, **kwargs)
    386 else:
    387     kwargs = dict(self._inherit_kwargs, **dict(self._kwargs, **kwargs))
--> 388 return type(self)(self._obj, fn=self._fn, transform=transform, plot=plot, depth=depth,
    389                  loc=loc, center=center, dmap=dmap, _shared_obj=self._shared_obj,
    390                  max_rows=max_rows, **kwargs)

File D:\Code\.Envs\work\Lib\site-packages\hvplot\interactive.py:282, in Interactive.__init__(self, obj, transform, fn, plot, depth, loc, center, dmap, inherit_kwargs, max_rows, method, _shared_obj, _current, **kwargs)
    280     self._current_ = _current
    281 else:
--> 282     self._current_ = self._transform.apply(ds, keep_index=True, compute=False)
    283 self._init = True
    284 self._dirty = False

File D:\Code\.Envs\work\Lib\site-packages\holoviews\util\transform.py:774, in dim.apply(self, dataset, flat, expanded, ranges, all_values, keep_index, compute, strict)
    772     drange = ranges.get(eldim, {})
    773     drange = drange.get('combined', drange)
--> 774     data = self._apply_fn(dataset, data, fn, fn_name, args,
    775                           kwargs, accessor, drange)
    776 drop_index = keep_index_for_compute and not keep_index
    777 compute = not compute_for_compute and compute

File D:\Code\.Envs\work\Lib\site-packages\holoviews\util\transform.py:674, in dim._apply_fn(self, dataset, data, fn, fn_name, args, kwargs, accessor, drange)
    672                 raise e
    673 else:
--> 674     data = fn(*args, **kwargs)
    676 return data

File D:\Code\.Envs\work\Lib\site-packages\pandas\core\frame.py:3761, in DataFrame.__getitem__(self, key)
   3759 if self.columns.nlevels > 1:
   3760     return self._getitem_multilevel(key)
-> 3761 indexer = self.columns.get_loc(key)
   3762 if is_integer(indexer):
   3763     indexer = [indexer]

File D:\Code\.Envs\work\Lib\site-packages\pandas\core\indexes\base.py:3654, in Index.get_loc(self, key)
   3652     return self._engine.get_loc(casted_key)
   3653 except KeyError as err:
-> 3654     raise KeyError(key) from err
   3655 except TypeError:
   3656     # If we have a listlike key, _check_indexing_error will raise
   3657     #  InvalidIndexError. Otherwise we fall through and re-raise
   3658     #  the TypeError.
   3659     self._check_indexing_error(key)

KeyError: 1
hoxbro commented 1 year ago

Are you getting this error with the code in the example, or are you playing around with the code yourself?

For the first example np.nanmean does not know how to handle an interactive object. This can be solved with .apply like this: dfi.apply(lambda x: np.nanmean(x, axis=0))

For the second example the output of dfi.shape is also an interactive object. To get the right output you can run .eval on it: dfi.shape.eval().