aayush97 / semeval2023-afrisenti

A low-resource sentiment analysis project for African Languages
MIT License
0 stars 0 forks source link

Class distribution for each language #6

Closed Pha03 closed 2 years ago

Pha03 commented 2 years ago

import pandas as pd import numpy as np import matplotlib.pyplot as plt

df_am = pd.read_csv('am_train.txt',sep='\t') df_am.head() df_am.label.unique() plt.hist(df_am['label'],bins=5) print(len(df_am[df_am['label']=='negative']), len(df_am[df_am['label']=='positive']), len(df_am[df_am['label']=='neutral']))

df_ha = pd.read_csv('ha_train.txt',delimiter='\t') df_ha.head() df_ha.label.unique() plt.hist(df_ha['label'],bins=5) print(len(df_ha[df_ha['label']=='negative']), len(df_ha[df_ha['label']=='positive']), len(df_ha[df_ha['label']=='neutral']))

df_dz = pd.read_csv('dz_train.txt',delimiter='\t') df_dz.head() df_dz.label.unique() plt.hist(df_dz['label'],bins=5) print(len(df_dz[df_dz['label']=='negative']), len(df_dz[df_dz['label']=='positive']), len(df_dz[df_dz['label']=='neutral']))

df_ig = pd.read_csv('ig_train.txt',delimiter='\t') df_ig.head() df_ig.label.unique() plt.hist(df_ig['label'],bins=5) print(len(df_ig[df_ig['label']=='negative']), len(df_ig[df_ig['label']=='positive']), len(df_ig[df_ig['label']=='neutral']))

df_ma = pd.read_csv('ma_train.txt',delimiter='\t') df_ma.head() df_ma.label.unique() plt.hist(df_ma['label'],bins=5) print(len(df_ma[df_ma['label']=='negative']), len(df_ma[df_ma['label']=='positive']), len(df_ma[df_ma['label']=='neutral']))

df_pcm = pd.read_csv('pcm_train.txt',delimiter='\t') df_pcm.head() df_pcm.label.unique() plt.hist(df_pcm['label'],bins=5) print(len(df_pcm[df_pcm['label']=='negative']), len(df_pcm[df_pcm['label']=='positive']), len(df_pcm[df_pcm['label']=='neutral']))

df_pt = pd.read_csv('pt_train.txt',delimiter='\t') df_pt.head() df_pt.label.unique() plt.hist(df_pt['label'],bins=5) print(len(df_pt[df_pt['label']=='negative']), len(df_pt[df_pt['label']=='positive']), len(df_pt[df_pt['label']=='neutral']))

df_sw = pd.read_csv('sw_train.txt',delimiter='\t') df_sw.head() df_sw.label.unique() plt.hist(df_sw['label'],bins=5) print(len(df_sw[df_sw['label']=='negative']), len(df_sw[df_sw['label']=='positive']), len(df_sw[df_sw['label']=='neutral']))

df_yo = pd.read_csv('yo_train.txt',delimiter='\t') df_yo.head() df_yo.label.unique() plt.hist(df_yo['label'],bins=5) print(len(df_yo[df_yo['label']=='negative']), len(df_yo[df_yo['label']=='positive']), len(df_yo[df_yo['label']=='neutral']))

aayush97 commented 2 years ago

image image