shakedzy / dython

A set of data tools in Python
http://shakedzy.xyz/dython/
MIT License
492 stars 102 forks source link

Issue with plotting heatmap using Dython associations #144

Closed chimtingshing closed 1 year ago

chimtingshing commented 1 year ago

Version check:

Run and copy the output:

import sys, dython
print(sys.version_info)
print(dython.__version__)

Output:
sys.version_info(major=3, minor=8, micro=5, releaselevel='final', serial=0)
0.7.2

Describe the bug:

I am unable to plot graph of heat-map of the Iris dataset features when following the Dython example.

Code to reproduce:

import pandas as pd
from sklearn import datasets
from dython.nominal import associations

# Load data 
iris = datasets.load_iris()

# Convert int classes to strings to allow associations 
# method to automatically recognize categorical columns
target = ['C{}'.format(i) for i in iris.target]

# Prepare data
X = pd.DataFrame(data=iris.data, columns=iris.feature_names)
y = pd.DataFrame(data=target, columns=['target'])
df = pd.concat([X, y], axis=1)

# Plot features associations
associations(df)

Error message:

Error message:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-11dcec0cf513> in <module>
     15 
     16 # Plot features associations
---> 17 associations(df)

D:\Python Anaconda\lib\site-packages\dython\nominal.py in associations(dataset, nominal_columns, numerical_columns, mark_columns, nom_nom_assoc, num_num_assoc, nom_num_assoc, symmetric_nom_nom, symmetric_num_num, display_rows, display_columns, hide_rows, hide_columns, cramers_v_bias_correction, nan_strategy, nan_replace_value, ax, figsize, annot, fmt, cmap, sv_color, cbar, vmax, vmin, plot, compute_only, clustering, title, filename, multiprocessing, max_cpu_cores)
    775             else 0.0
    776         )
--> 777         ax = sns.heatmap(
    778             corr,
    779             cmap=cmap,

D:\Python Anaconda\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

D:\Python Anaconda\lib\site-packages\seaborn\matrix.py in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, xticklabels, yticklabels, mask, ax, **kwargs)
    556     if square:
    557         ax.set_aspect("equal")
--> 558     plotter.plot(ax, cbar_ax, kwargs)
    559     return ax
    560 

D:\Python Anaconda\lib\site-packages\seaborn\matrix.py in plot(self, ax, cax, kws)
    306 
    307         # Draw the heatmap
--> 308         mesh = ax.pcolormesh(self.plot_data, cmap=self.cmap, **kws)
    309 
    310         # Set the axis limits

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
   1421     def inner(ax, *args, data=None, **kwargs):
   1422         if data is None:
-> 1423             return func(ax, *map(sanitize_sequence, args), **kwargs)
   1424 
   1425         bound = new_sig.bind(ax, *args, **kwargs)

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\axes\_axes.py in pcolormesh(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)
   6169         kwargs.setdefault('snap', mpl.rcParams['pcolormesh.snap'])
   6170 
-> 6171         collection = mcoll.QuadMesh(
   6172             coords, antialiased=antialiased, shading=shading,
   6173             array=C, cmap=cmap, norm=norm, alpha=alpha, **kwargs)

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\collections.py in __init__(self, *args, **kwargs)
   1986         # super init delayed after own init because array kwarg requires
   1987         # self._coordinates and self._shading
-> 1988         super().__init__(**kwargs)
   1989         self.set_mouseover(False)
   1990 

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\_api\deprecation.py in wrapper(*args, **kwargs)
    452                 "parameter will become keyword-only %(removal)s.",
    453                 name=name, obj_type=f"parameter of {func.__name__}()")
--> 454         return func(*args, **kwargs)
    455 
    456     # Don't modify *func*'s signature, as boilerplate.py needs it.

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\collections.py in __init__(self, edgecolors, facecolors, linewidths, linestyles, capstyle, joinstyle, antialiaseds, offsets, offset_transform, norm, cmap, pickradius, hatch, urls, zorder, **kwargs)
    200 
    201         self._path_effects = None
--> 202         self._internal_update(kwargs)
    203         self._paths = None
    204 

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\artist.py in _internal_update(self, kwargs)
   1184         The lack of prenormalization is to maintain backcompatibility.
   1185         """
-> 1186         return self._update_props(
   1187             kwargs, "{cls.__name__}.set() got an unexpected keyword argument "
   1188             "{prop_name!r}")

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\artist.py in _update_props(self, props, errfmt)
   1160                         raise AttributeError(
   1161                             errfmt.format(cls=type(self), prop_name=k))
-> 1162                     ret.append(func(v))
   1163         if ret:
   1164             self.pchanged()

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\collections.py in set_array(self, A)
   2048                     f"X ({width}) and/or Y ({height})")
   2049 
-> 2050         return super().set_array(A)
   2051 
   2052     def get_datalim(self, transData):

~\AppData\Roaming\Python\Python38\site-packages\matplotlib\cm.py in set_array(self, A)
    531         A = cbook.safe_masked_invalid(A, copy=True)
    532         if not np.can_cast(A.dtype, float, "same_kind"):
--> 533             raise TypeError(f"Image data of dtype {A.dtype} cannot be "
    534                             "converted to float")
    535 

TypeError: Image data of dtype object cannot be converted to float

Input data:

Input data I used was Iris dataset

chimtingshing commented 1 year ago

After reading a dependency issue faced by another user, I downloaded pandas version 1.4.4 as I was using pandas version 1.5.2, after that the function will work.

shakedzy commented 1 year ago

Yes, the current stable version has an issue with Pandas 1.5 and above. The next version will fix it. In the meantime, you can install Dython from source to get the dev version, which is fixed too