has2k1 / plotnine

A Grammar of Graphics for Python
https://plotnine.org
MIT License
4k stars 213 forks source link

Bug when facetting, discrete position and multiple layers #647

Closed has2k1 closed 1 year ago

has2k1 commented 1 year ago

Multiple layers + discrete position aesthetic + facetting AND a facet which lacks a value in any one layer.

import pandas as pd
from plotnine import *

df1 = pd.DataFrame({
    'x': list("abc"),
    'y': [1, 2, 3],
    'g': list("AAA") # This layer would have no entry in the "B" facet
                     # triggering the bug
})

df2 = pd.DataFrame({
    'x': list("abc"),
    'y': [4, 5, 6],
    'g': list("AAB")
})

(ggplot(aes("x", "y"))
 + geom_point(df1)
 + geom_point(df2)
 + facet_wrap("g", scales="free_x")
)
```python-traceback --------------------------------------------------------------------------- IndexError Traceback (most recent call last) File ~/env/plotnine/scales/scale_xy.py:80, in scale_position_discrete.map(self, series, limits) 79 try: ---> 80 seq = seq[idx] 81 except IndexError: 82 # Deal with missing data 83 # - Insert NaN where there is no match IndexError: arrays used as indices must be of integer (or boolean) type During handling of the above exception, another exception occurred: IndexError Traceback (most recent call last) File ~/.pyenv/versions/3.10.4/envs/plotnine/lib/python3.10/site-packages/IPython/core/formatters.py:707, in PlainTextFormatter.__call__(self, obj) 700 stream = StringIO() 701 printer = pretty.RepresentationPrinter(stream, self.verbose, 702 self.max_width, self.newline, 703 max_seq_length=self.max_seq_length, 704 singleton_pprinters=self.singleton_printers, 705 type_pprinters=self.type_printers, 706 deferred_pprinters=self.deferred_printers) --> 707 printer.pretty(obj) 708 printer.flush() 709 return stream.getvalue() File ~/.pyenv/versions/3.10.4/envs/plotnine/lib/python3.10/site-packages/IPython/lib/pretty.py:410, in RepresentationPrinter.pretty(self, obj) 407 return meth(obj, self, cycle) 408 if cls is not object \ 409 and callable(cls.__dict__.get('__repr__')): --> 410 return _repr_pprint(obj, self, cycle) 412 return _default_pprint(obj, self, cycle) 413 finally: File ~/.pyenv/versions/3.10.4/envs/plotnine/lib/python3.10/site-packages/IPython/lib/pretty.py:778, in _repr_pprint(obj, p, cycle) 776 """A pprint that just redirects to the normal repr function.""" 777 # Find newlines and replace them with p.break_() --> 778 output = repr(obj) 779 lines = output.splitlines() 780 with p.group(): File ~/env/plotnine/ggplot.py:101, in ggplot.__repr__(self) 97 def __repr__(self): 98 """ 99 Print/show the plot 100 """ --> 101 self.__str__() 102 return '' % self.__hash__() File ~/env/plotnine/ggplot.py:92, in ggplot.__str__(self) 88 def __str__(self): 89 """ 90 Print/show the plot 91 """ ---> 92 self.draw(show=True) 94 # Return and empty string so that print(p) is "pretty" 95 return '' File ~/env/plotnine/ggplot.py:210, in ggplot.draw(self, return_ggplot, show) 208 self = deepcopy(self) 209 with plot_context(self, show=show): --> 210 self._build() 212 # setup 213 figure, axs = self._create_figure() File ~/env/plotnine/ggplot.py:307, in ggplot._build(self) 304 # Map and train positions so that statistics have access 305 # to ranges and all positions are numeric 306 layout.train_position(layers, scales) --> 307 layout.map_position(layers) 309 # Apply and map statistics 310 layers.compute_statistic(layout) File ~/env/plotnine/facets/layout.py:108, in Layout.map_position(self, layers) 105 x_vars = list(set(self.panel_scales_x[0].aesthetics) & 106 set(data.columns)) 107 SCALE_X = _layout['SCALE_X'].iloc[match_id].tolist() --> 108 self.panel_scales_x.map(data, x_vars, SCALE_X) 110 if self.panel_scales_y: 111 y_vars = list(set(self.panel_scales_y[0].aesthetics) & 112 set(data.columns)) File ~/env/plotnine/scales/scales.py:165, in Scales.map(self, data, vars, idx) 163 for i, sc in enumerate(self, start=1): 164 bool_idx = (i == idx) --> 165 results = sc.map(data.loc[bool_idx, col]) 166 if use_df: 167 df.loc[bool_idx, col] = results File ~/env/plotnine/scales/scale_xy.py:86, in scale_position_discrete.map(self, series, limits) 84 seq = np.hstack((seq.astype(float), np.nan)) 85 idx = np.clip(idx, 0, len(seq)-1) ---> 86 seq = seq[idx] 87 return seq 88 return series IndexError: arrays used as indices must be of integer (or boolean) type ```

Ref: https://stackoverflow.com/q/74369454/832573