choshin84 / learning_memo

personal learning memo
0 stars 0 forks source link

Useful visualization #54

Open choshin84 opened 4 years ago

choshin84 commented 4 years ago

Useful link

https://www.kaggle.com/robikscube/m5-forecasting-starter-data-exploration

choshin84 commented 4 years ago

Heatmap calendar

for i in stv['cat_id'].unique():
    fig, axes = plt.subplots(3, 1, figsize=(20, 8))
    items_col = [c for c in past_sales.columns if i in c]
    sales2013 = past_sales.loc[past_sales.index.isin(pd.date_range('31-Dec-2012',
                                                                   periods=371))][items_col].mean(axis=1)
    vals = np.hstack(sscale.fit_transform(sales2013.values.reshape(-1, 1)))
    calmap(axes[0], 2013, vals.reshape(53,7).T)
    sales2014 = past_sales.loc[past_sales.index.isin(pd.date_range('30-Dec-2013',
                                                                   periods=371))][items_col].mean(axis=1)
    vals = np.hstack(sscale.fit_transform(sales2014.values.reshape(-1, 1)))
    calmap(axes[1], 2014, vals.reshape(53,7).T)
    sales2015 = past_sales.loc[past_sales.index.isin(pd.date_range('29-Dec-2014',
                                                                   periods=371))][items_col].mean(axis=1)
    vals = np.hstack(sscale.fit_transform(sales2015.values.reshape(-1, 1)))
    calmap(axes[2], 2015, vals.reshape(53,7).T)
    plt.suptitle(i, fontsize=30, x=0.4, y=1.01)
    plt.tight_layout()
    plt.show()
choshin84 commented 4 years ago

Tiled line charts

fig, axes = plt.subplots(5, 2, figsize=(15, 10), sharex=True)
axes = axes.flatten()
ax_idx = 0
for s in store_list:
    store_items = [c for c in past_sales.columns if s in c]
    past_sales[store_items] \
        .sum(axis=1) \
        .rolling(7).mean() \
        .plot(alpha=1,
              ax=axes[ax_idx],
              title=s,
              lw=3,
              color=next(color_cycle))
    ax_idx += 1
# plt.legend(store_list)
plt.suptitle('Weekly Sale Trends by Store ID')
plt.tight_layout()
plt.show()
choshin84 commented 4 years ago

Stacked bar

import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(index=['06-11-2018', '06-12-2018', '06-13-2018',
                         '06-14-2018', '06-15-2018'], 
                  data={'A': [-378, 2347, 509, 987, 513], 
                        'B': [527, 2599, -765, 533, 670], 
                        'C': [2343, -2273, 2093, -2197, 1990], 
                        'D': [1845, 1853, -3325, 1306, 2160]})

ax = df.plot(kind="bar", stacked=True)
df.sum(axis=1).plot(ax=ax, color="k")

plt.show()