pandas-dev / pandas

Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
https://pandas.pydata.org
BSD 3-Clause "New" or "Revised" License
43.33k stars 17.81k forks source link

BUG: seaborn `heatmap` not happy with pyarrow types #56270

Closed mattharrison closed 9 months ago

mattharrison commented 9 months ago

Pandas version checks

Reproducible Example

import seaborn as sns
import pandas as pd
import io

data = '''name,x,y
matt,10,20
fred,20,1
jeff,2,3
lisa,5,7
adam,23,1'''

df = pd.read_csv(io.StringIO(data), dtype_backend='pyarrow')
sns.heatmap(df[['x', 'y']])

Issue Description

This throws a TypeError but works with Pandas 1 types:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[33], line 13
      5 data = '''name,x,y
      6 matt,10,20
      7 fred,20,1
      8 jeff,2,3
      9 lisa,5,7
     10 adam,23,1'''
     12 df = pd.read_csv(io.StringIO(data), dtype_backend='pyarrow')
---> 13 sns.heatmap(df[['x', 'y']])

File ~/.envs/menv/lib/python3.10/site-packages/seaborn/matrix.py:459, in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, xticklabels, yticklabels, mask, ax, **kwargs)
    457 if square:
    458     ax.set_aspect("equal")
--> 459 plotter.plot(ax, cbar_ax, kwargs)
    460 return ax

File ~/.envs/menv/lib/python3.10/site-packages/seaborn/matrix.py:306, in _HeatMapper.plot(self, ax, cax, kws)
    303     kws.setdefault("vmax", self.vmax)
    305 # Draw the heatmap
--> 306 mesh = ax.pcolormesh(self.plot_data, cmap=self.cmap, **kws)
    308 # Set the axis limits
    309 ax.set(xlim=(0, self.data.shape[1]), ylim=(0, self.data.shape[0]))

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/__init__.py:1423, in _preprocess_data.<locals>.inner(ax, data, *args, **kwargs)
   1420 @functools.wraps(func)
   1421 def inner(ax, *args, data=None, **kwargs):
   1422     if data is None:
-> 1423         return func(ax, *map(sanitize_sequence, args), **kwargs)
   1425     bound = new_sig.bind(ax, *args, **kwargs)
   1426     auto_label = (bound.arguments.get(label_namer)
   1427                   or bound.kwargs.get(label_namer))

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/axes/_axes.py:6171, in Axes.pcolormesh(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)
   6167 C = C.ravel()
   6169 kwargs.setdefault('snap', mpl.rcParams['pcolormesh.snap'])
-> 6171 collection = mcoll.QuadMesh(
   6172     coords, antialiased=antialiased, shading=shading,
   6173     array=C, cmap=cmap, norm=norm, alpha=alpha, **kwargs)
   6174 collection._scale_norm(norm, vmin, vmax)
   6175 self._pcolor_grid_deprecation_helper()

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/collections.py:1988, in QuadMesh.__init__(self, *args, **kwargs)
   1985 self._bbox.update_from_data_xy(self._coordinates.reshape(-1, 2))
   1986 # super init delayed after own init because array kwarg requires
   1987 # self._coordinates and self._shading
-> 1988 super().__init__(**kwargs)
   1989 self.set_mouseover(False)

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/_api/deprecation.py:454, in make_keyword_only.<locals>.wrapper(*args, **kwargs)
    448 if len(args) > name_idx:
    449     warn_deprecated(
    450         since, message="Passing the %(name)s %(obj_type)s "
    451         "positionally is deprecated since Matplotlib %(since)s; the "
    452         "parameter will become keyword-only %(removal)s.",
    453         name=name, obj_type=f"parameter of {func.__name__}()")
--> 454 return func(*args, **kwargs)

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/collections.py:202, in Collection.__init__(self, edgecolors, facecolors, linewidths, linestyles, capstyle, joinstyle, antialiaseds, offsets, offset_transform, norm, cmap, pickradius, hatch, urls, zorder, **kwargs)
    199 self._offset_transform = offset_transform
    201 self._path_effects = None
--> 202 self._internal_update(kwargs)
    203 self._paths = None

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/artist.py:1186, in Artist._internal_update(self, kwargs)
   1179 def _internal_update(self, kwargs):
   1180     """
   1181     Update artist properties without prenormalizing them, but generating
   1182     errors as if calling `set`.
   1183 
   1184     The lack of prenormalization is to maintain backcompatibility.
   1185     """
-> 1186     return self._update_props(
   1187         kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1188         "{prop_name!r}")

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/artist.py:1162, in Artist._update_props(self, props, errfmt)
   1159             if not callable(func):
   1160                 raise AttributeError(
   1161                     errfmt.format(cls=type(self), prop_name=k))
-> 1162             ret.append(func(v))
   1163 if ret:
   1164     self.pchanged()

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/collections.py:2050, in QuadMesh.set_array(self, A)
   2045     if faulty_data:
   2046         raise TypeError(
   2047             f"Dimensions of A {A.shape} are incompatible with "
   2048             f"X ({width}) and/or Y ({height})")
-> 2050 return super().set_array(A)

File ~/.envs/menv/lib/python3.10/site-packages/matplotlib/cm.py:533, in ScalarMappable.set_array(self, A)
    531 A = cbook.safe_masked_invalid(A, copy=True)
    532 if not np.can_cast(A.dtype, float, "same_kind"):
--> 533     raise TypeError(f"Image data of dtype {A.dtype} cannot be "
    534                     "converted to float")
    536 self._A = A

TypeError: Image data of dtype object cannot be converted to float

Expected Behavior

I want my plots to work with Pandas 2. This might be a Seaborn issue or a coordination issue. Thought I'd make the world aware.

Installed Versions

INSTALLED VERSIONS ------------------ commit : e86ed377639948c64c429059127bcf5b359ab6be python : 3.10.13.final.0 python-bits : 64 OS : Darwin OS-release : 21.6.0 Version : Darwin Kernel Version 21.6.0: Wed Aug 10 14:28:23 PDT 2022; root:xnu-8020.141.5~2/RELEASE_ARM64_T6000 machine : arm64 processor : arm byteorder : little LC_ALL : en_US.UTF-8 LANG : None LOCALE : en_US.UTF-8 pandas : 2.1.1 numpy : 1.23.5 pytz : 2023.3 dateutil : 2.8.2 setuptools : 67.6.1 pip : 23.2.1 Cython : 3.0.4 pytest : 7.2.0 hypothesis : 6.81.2 sphinx : None blosc : None feather : None xlsxwriter : 3.1.2 lxml.etree : 4.9.2 html5lib : 1.1 pymysql : None psycopg2 : None jinja2 : 3.1.2 IPython : 8.8.0 pandas_datareader : None bs4 : 4.11.1 bottleneck : None dataframe-api-compat: None fastparquet : None fsspec : 2023.3.0 gcsfs : None matplotlib : 3.6.2 numba : 0.56.4 numexpr : 2.8.4 odfpy : None openpyxl : 3.0.10 pandas_gbq : None pyarrow : 13.0.0 pyreadstat : None pyxlsb : None s3fs : None scipy : 1.10.0 sqlalchemy : 2.0.21 tables : None tabulate : 0.9.0 xarray : None xlrd : 2.0.1 zstandard : None tzdata : 2023.3 qtpy : None pyqt5 : None
mroeschke commented 9 months ago

Thanks for the report, but based on the traceback it appears like a matplotlib/seaborn compat issue with these types so I would recommend raising this issue in seaborn. Closing as an upstream issue