has2k1 / plotnine

A Grammar of Graphics for Python
MIT License
4.03k stars 217 forks source link

stats_smooth function reports strange errors when arg is set as 'method=loess' #355

Open tlkahn opened 4 years ago

tlkahn commented 4 years ago

The code below will cause the issue:

ggplot(mtcars, aes('wt', 'mpg', color='gear')) + geom_point() + stat_smooth(aes('wt', 'mpg'), method='loess') + facet_wrap(('gear', 'am'))

I have installed scikit-misc since it complains about missing modules.

Below is the trace back:

ValueError                                Traceback (most recent call last)
/usr/local/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

/usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj)
    400                         if cls is not object \
    401                                 and callable(cls.__dict__.get('__repr__')):
--> 402                             return _repr_pprint(obj, self, cycle)
    404             return _default_pprint(obj, self, cycle)

/usr/local/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
    695     """A pprint that just redirects to the normal repr function."""
    696     # Find newlines and replace them with p.break_()
--> 697     output = repr(obj)
    698     for idx,output_line in enumerate(output.splitlines()):
    699         if idx:

/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in __repr__(self)
     86         # in the jupyter notebook.
     87         if not self.figure:
---> 88             self.draw()
     89         plt.show()
     90         return '<ggplot: (%d)>' % self.__hash__()

/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in draw(self, return_ggplot)
    179         # new frames knowing that they are separate from the original.
    180         with pd.option_context('mode.chained_assignment', None):
--> 181             return self._draw(return_ggplot)
    183     def _draw(self, return_ggplot=False):

/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in _draw(self, return_ggplot)
    186         # assign a default theme
    187         self = deepcopy(self)
--> 188         self._build()
    190         # If no theme we use the default

/usr/local/lib/python3.7/site-packages/plotnine/ggplot.py in _build(self)
    298         # Apply and map statistics
--> 299         layers.compute_statistic(layout)
    300         layers.map_statistic(self)

/usr/local/lib/python3.7/site-packages/plotnine/layer.py in compute_statistic(self, layout)
     84     def compute_statistic(self, layout):
     85         for l in self:
---> 86             l.compute_statistic(layout)
     88     def map_statistic(self, plot):

/usr/local/lib/python3.7/site-packages/plotnine/layer.py in compute_statistic(self, layout)
    358         data = self.stat.use_defaults(data)
    359         data = self.stat.setup_data(data)
--> 360         data = self.stat.compute_layer(data, params, layout)
    361         self.data = data

/usr/local/lib/python3.7/site-packages/plotnine/stats/stat.py in compute_layer(cls, data, params, layout)
    266             return cls.compute_panel(pdata, pscales, **params)
--> 268         return groupby_apply(data, 'PANEL', fn)
    270     @classmethod

/usr/local/lib/python3.7/site-packages/plotnine/utils.py in groupby_apply(df, cols, func, *args, **kwargs)
    631         # function fn should be free to modify dataframe d, therefore
    632         # do not mark d as a slice of df i.e no SettingWithCopyWarning
--> 633         lst.append(func(d, *args, **kwargs))
    634     return pd.concat(lst, axis=axis, ignore_index=True)

/usr/local/lib/python3.7/site-packages/plotnine/stats/stat.py in fn(pdata)
    264                 return pdata
    265             pscales = layout.get_scales(pdata['PANEL'].iat[0])
--> 266             return cls.compute_panel(pdata, pscales, **params)
    268         return groupby_apply(data, 'PANEL', fn)

/usr/local/lib/python3.7/site-packages/plotnine/stats/stat.py in compute_panel(cls, data, scales, **params)
    297         stats = []
    298         for _, old in data.groupby('group'):
--> 299             new = cls.compute_group(old, scales, **params)
    300             unique = uniquecols(old)
    301             missing = unique.columns.difference(new.columns)

/usr/local/lib/python3.7/site-packages/plotnine/stats/stat_smooth.py in compute_group(cls, data, scales, **params)
    194             xseq = np.linspace(rangee[0], rangee[1], n)
--> 196         return predictdf(data, xseq, **params)

/usr/local/lib/python3.7/site-packages/plotnine/stats/smoothers.py in predictdf(data, xseq, **params)
     41         raise PlotnineError()
---> 43     return method(data, xseq, **params)

/usr/local/lib/python3.7/site-packages/plotnine/stats/smoothers.py in loess(data, xseq, **params)
    201     lo = loess_klass(data['x'], data['y'], weights, **kwargs)
--> 202     lo.fit()
    204     data = pd.DataFrame({'x': xseq})

_loess.pyx in _loess.loess.fit()

ValueError: b'There are other near singularities as well. 0.063227\n'
has2k1 commented 4 years ago

There are not enough distinct data points in some or all the local groups to run a loess regression? Try increasing the value of the span.