utterworks / fast-bert

Super easy library for BERT based NLP models
Apache License 2.0
1.85k stars 342 forks source link

printing / saving out the accuracy results of the learner object #257

Open yovelcohen opened 3 years ago

yovelcohen commented 3 years ago

I'm trying to learn the usage of the package and BERT in general, I'm following the tutorial specified in the read me, and I'm having trouble understanding how to print the accuracy results.

My code:

class FastBert(BertSentimentAnalysis):
    def __init__(self):
        super(FastBert, self).__init__()

    @property
    def load_data_set(self):
        df = pd.read_csv("/content/drive/My Drive/BERT/data/Labled_tweets.csv")
        df = df[df.label.isin(SENTIMENT_LIST)]
        df = df.dropna(axis=1)
        df = df[[TEXT, LABEL.lower()]]
        return df

    def load_test_data(self):
        # Split the DataSet into Train and Validation sets 70/30
        train, valid = train_test_split(self.df, test_size=0.3)
        # Save back to .csv format
        train.to_csv('/content/drive/My Drive/BERT/Resources/train.csv', index=False)
        valid.to_csv('/content/drive/My Drive/BERT/Resources/valid.csv', index=False)

    def train(self,texts):
        marker_wrapper_printer("Starting training")
        device = torch.device('cuda')
        # check if multiple GPUs are available
        if torch.cuda.device_count() > 1:
            multi_gpu = True
        else:
            multi_gpu = False

        # BertDataBunch contains the training, validation, and tests sets, alongside
        # arguments and the tokenizer used in training

        databunch = BertDataBunch(DATA_PATH, LABEL_DATA,
                                  tokenizer=self.tokenizer,
                                  train_file='train.csv',
                                  val_file='valid.csv',
                                  label_file='labels.csv',
                                  text_col='text',
                                  label_col='label',
                                  max_seq_length=CONFIG['max_seq_length'],
                                  multi_gpu=multi_gpu, multi_label=False)
        metrics = []
        metrics.append({'name': 'accuracy', 'function': accuracy})
        pprint(f"METRICS \n {metrics}")
        # The learner contains the logic for training loop, validation loop,
        # optimiser strategies and key metrics calculation
        learner = BertLearner.from_pretrained_model(dataBunch=databunch, pretrained_path='/content/drive/My Drive/BERT/Output/model_out',
                                                    metrics=metrics, device=device, logger=logger,
                                                    finetuned_wgts_path=None, is_fp16=CONFIG['fp16'],
                                                    loss_scale=CONFIG['loss_scale'], multi_gpu=multi_gpu,
                                                    multi_label=False,
                                                    max_grad_norm=CONFIG["gradient_accumulation_steps"],
                                                    output_dir="/content/drive/My Drive/BERT/Output")

        # Train the model
        marker_wrapper_printer("Training Fast Bert Model")
        learner.fit(epochs=2,
                    lr=6e-5,
                    validate=True,  # Evaluate the model after each epoch
                    schedule_type="warmup_cosine",
                    optimizer_type="lamb")
        predictions = learner.predict_batch(texts)
        pprint(predictions)
        learner.save_model()
        return learner
    def run(self, texts):
        fbsa = FastBert()
        fbsa.load_test_data()
        learner = fbsa.train(texts)
        marker_wrapper_printer("training completed, trained model saved successfully")
        return learner
FBSA = FastBert()
learner = FBSA.run(texts=texts_to_analyse)

I'm just confused as to how do I print the model's accuracy scores, I defined the method as stated in the read me, just printing/saving those scores has eluded me :)