I'm trying to learn the usage of the package and BERT in general, I'm following the tutorial specified in the read me, and I'm having trouble understanding how to print the accuracy results.
My code:
class FastBert(BertSentimentAnalysis):
def __init__(self):
super(FastBert, self).__init__()
@property
def load_data_set(self):
df = pd.read_csv("/content/drive/My Drive/BERT/data/Labled_tweets.csv")
df = df[df.label.isin(SENTIMENT_LIST)]
df = df.dropna(axis=1)
df = df[[TEXT, LABEL.lower()]]
return df
def load_test_data(self):
# Split the DataSet into Train and Validation sets 70/30
train, valid = train_test_split(self.df, test_size=0.3)
# Save back to .csv format
train.to_csv('/content/drive/My Drive/BERT/Resources/train.csv', index=False)
valid.to_csv('/content/drive/My Drive/BERT/Resources/valid.csv', index=False)
def train(self,texts):
marker_wrapper_printer("Starting training")
device = torch.device('cuda')
# check if multiple GPUs are available
if torch.cuda.device_count() > 1:
multi_gpu = True
else:
multi_gpu = False
# BertDataBunch contains the training, validation, and tests sets, alongside
# arguments and the tokenizer used in training
databunch = BertDataBunch(DATA_PATH, LABEL_DATA,
tokenizer=self.tokenizer,
train_file='train.csv',
val_file='valid.csv',
label_file='labels.csv',
text_col='text',
label_col='label',
max_seq_length=CONFIG['max_seq_length'],
multi_gpu=multi_gpu, multi_label=False)
metrics = []
metrics.append({'name': 'accuracy', 'function': accuracy})
pprint(f"METRICS \n {metrics}")
# The learner contains the logic for training loop, validation loop,
# optimiser strategies and key metrics calculation
learner = BertLearner.from_pretrained_model(dataBunch=databunch, pretrained_path='/content/drive/My Drive/BERT/Output/model_out',
metrics=metrics, device=device, logger=logger,
finetuned_wgts_path=None, is_fp16=CONFIG['fp16'],
loss_scale=CONFIG['loss_scale'], multi_gpu=multi_gpu,
multi_label=False,
max_grad_norm=CONFIG["gradient_accumulation_steps"],
output_dir="/content/drive/My Drive/BERT/Output")
# Train the model
marker_wrapper_printer("Training Fast Bert Model")
learner.fit(epochs=2,
lr=6e-5,
validate=True, # Evaluate the model after each epoch
schedule_type="warmup_cosine",
optimizer_type="lamb")
predictions = learner.predict_batch(texts)
pprint(predictions)
learner.save_model()
return learner
def run(self, texts):
fbsa = FastBert()
fbsa.load_test_data()
learner = fbsa.train(texts)
marker_wrapper_printer("training completed, trained model saved successfully")
return learner
I'm just confused as to how do I print the model's accuracy scores, I defined the method as stated in the read me,
just printing/saving those scores has eluded me :)
I'm trying to learn the usage of the package and BERT in general, I'm following the tutorial specified in the read me, and I'm having trouble understanding how to print the accuracy results.
My code:
I'm just confused as to how do I print the model's accuracy scores, I defined the method as stated in the read me, just printing/saving those scores has eluded me :)