Closed leopensaa closed 2 years ago
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
olist_revs = pd.read_csv('datasets/processed/nlp_to_olist.csv')
external_revs = pd.read_csv('datasets/processed/nlp_to_external_data.csv')
filt = olist_revs.groupby(['category'])['sentiment'].count() >= 400
mean_olist = olist_revs.groupby(['category'])[['sentiment']].mean()
filt_olist = mean_olist[filt]
filt_olist.reset_index(inplace=True)
filt_olist.drop(5, inplace=True)
filt_olist.reset_index(inplace=True)
gped_external = external_revs.groupby(['category'])[['sentiment']].mean()
gped_external.reset_index(inplace=True)
translated_cats = pd.read_csv('datasets/product_category_name_translation.csv')
translated_cats.rename({'product_category_name':'category'}, axis=1, inplace=True)
translated_revs = pd.merge(gped_external, translated_cats, on='category', how='left')
translated_revs.fillna('sports_leisure', inplace=True)
translated_revs.sort_values(by='product_category_name_english', inplace=True)
translated_revs.reset_index(inplace=True)
diff_btew_categories = translated_revs['sentiment'] - filt_olist['sentiment']
df_diff = pd.concat([diff_btew_categories, categories], axis=1)
df_diff.sort_values(by='sentiment', inplace=True)
best_mlibre = translated_revs[translated_revs['product_category_name_english'].isin(df_diff['category'][:5])]
worst_mlibre = translated_revs[translated_revs['product_category_name_english'].isin(df_diff['category'][15:])]
best_olist = filt_olist[filt_olist['category'].isin(df_diff['category'][:5])]
worst_olist = filt_olist[filt_olist['category'].isin(df_diff['category'][15:])]
fig = make_subplots(rows=1, cols=2,
specs=[[{'type':'polar'}, {'type':'polar'}]],
subplot_titles=("Best", "Worst")
)
fig.add_traces(
[go.Scatterpolar(
r=best_olist['sentiment'],
theta=best_olist['category'],
name='Original Dataset'),
go.Scatterpolar(
r=best_mlibre['sentiment'],
theta=best_mlibre['product_category_name_english'],
name='External Data'),
go.Scatterpolar(
r=worst_olist['sentiment'],
theta=worst_olist['category'],
name='Original Dataset'),
go.Scatterpolar(
r=worst_mlibre['sentiment'],
theta=worst_mlibre['product_category_name_english'],
name='External Data')
],
rows=[1, 1, 1, 1],
cols=[1, 1, 2, 2])
fig.update_layout(
title_text="We vs Competitors (Reviews)"
)
fig.show()
Graphs corresponding to NLP and Scraping ready!
💡 Goal
Show one graph with insights from NLP model & Scraper.
🤝 Acceptance Criteria