Open stevedodson opened 4 years ago
Resolved by https://github.com/elastic/eland/pull/117
Still an issue with histograms and categorical columns:
feature='international plan'
df[df.churn==0][feature].hist(density=True, alpha=0.5)
df[df.churn==1][feature].hist(density=True, alpha=0.5)
plt.show()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-13-7bbc4512e82d> in <module>
1 feature='international plan'
2
----> 3 df[df.churn==0][feature].hist(density=True, alpha=0.5)
4 df[df.churn==1][feature].hist(density=True, alpha=0.5)
5 plt.show()
~/PycharmProjects/eland/eland/plotting/_core.py in ed_hist_series(self, by, ax, grid, xlabelsize, xrot, ylabelsize, yrot, figsize, bins, **kwds)
57 figsize=figsize,
58 bins=bins,
---> 59 **kwds
60 )
61
~/PycharmProjects/eland/eland/plotting/_matplotlib/hist.py in hist_series(self, by, ax, grid, xlabelsize, xrot, ylabelsize, yrot, figsize, bins, **kwds)
56 self_weights = self_weights.squeeze()
57
---> 58 ax.hist(self_bins[:-1], bins=self_bins, weights=self_weights, **kwds)
59 ax.grid(grid)
60 axes = np.array([ax])
~/anaconda3/envs/eland/lib/python3.7/site-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
1599 def inner(ax, *args, data=None, **kwargs):
1600 if data is None:
-> 1601 return func(ax, *map(sanitize_sequence, args), **kwargs)
1602
1603 bound = new_sig.bind(ax, *args, **kwargs)
~/anaconda3/envs/eland/lib/python3.7/site-packages/matplotlib/axes/_axes.py in hist(self, x, bins, range, density, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, normed, **kwargs)
6765 # this will automatically overwrite bins,
6766 # so that each histogram uses the same bins
-> 6767 m, bins = np.histogram(x[i], bins, weights=w[i], **hist_kwargs)
6768 m = m.astype(float) # causes problems later if it's an int
6769 if mlast is None:
<__array_function__ internals> in histogram(*args, **kwargs)
~/anaconda3/envs/eland/lib/python3.7/site-packages/numpy/lib/histograms.py in histogram(a, bins, range, normed, weights, density)
793 a, weights = _ravel_and_check_weights(a, weights)
794
--> 795 bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
796
797 # Histogram is an integer or a float array depending on the weights.
~/anaconda3/envs/eland/lib/python3.7/site-packages/numpy/lib/histograms.py in _get_bin_edges(a, bins, range, weights)
436
437 else:
--> 438 raise ValueError('`bins` must be 1d, when an array')
439
440 if n_equal_bins is not None:
ValueError: `bins` must be 1d, when an array
@sethmlarson / @stevedodson Can you please summarize on what exactly the issue is? 😃
See issue in cells 13-17 of notebook.
churn.csv.gz
Churn Results EDA.ipynb.gz