I’m working on a multi task classification with DistilBert with 4 labels, based on your repo, and I was wondering if maybe you could help me, since I'm having a hard time trying tor each the hugging face team.

I started training the model and it finished the first epoch, then it starts evaluation and throws the error below at the end of the evaluation. If I take theload_best_model_at_end out of the trainer args it runs the eval, but i get no eval loss. I also ran predict and I found out that I got label_ids=None.

I ran: for batch in trainer.get_eval_dataloader(data['test']): print(batch) break And got the follwoing: {'input_ids': tensor([[ 101, 67618, 10671, ..., 0, 0, 0], [ 101, 67618, 10671, ..., 169, 12211, 102], [ 101, 27746, 13386, ..., 0, 0, 0], [ 101, 73219, 14002, ..., 0, 0, 0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, ..., 0, 0, 0], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 0, 0, 0], [1, 1, 1, ..., 0, 0, 0]], device='cuda:0'), 'line_labels': tensor([3, 1, 1, 1], device='cuda:0'), 'cat_labels': tensor([ 9, 16, 16, 16], device='cuda:0'), 'sub_cat_labels': tensor([77, 48, 48, 48], device='cuda:0'), 'motive_labels': tensor([ 2, 34, 34, 34], device='cuda:0')}

I really need help figuring out what is going on here I out of options I can’t understand what is going on. If you could shed a light I would appreciate it. I'm really at the point where I feel I need to use a custom trainer, because I have no more solution to try.

Code:

Defining the metrics

LINE_METRIC = evaluate.load("f1") CAT_METRIC = evaluate.load("f1") SUB_CAT_METRIC = evaluate.load("f1") MOTIVE_METRIC = evaluate.load("f1")

def compute_metrics(eval_pred): print(eval_pred) all_logits, all_labels = eval_pred logits_line, logits_cat, logits_sub_cat, logits_motive = all_logits line_labels, cat_labels, sub_cat_labels, motive_labels = all_labels

line_predictions = np.argmax(logits_line, axis=-1)
cat_predictions = np.argmax(logits_cat, axis=-1)
sub_cat_predictions = np.argmax(logits_sub_cat, axis=-1)
motive_predictions = np.argmax(logits_motive, axis=-1)

print("PRED")
print(line_predictions, cat_predictions, sub_cat_predictions, motive_predictions)

line_computed_metrics = LINE_METRIC.compute(predictions=line_predictions, references=line_labels, average='weighted')
cat_computed_metrics = CAT_METRIC.compute(predictions=cat_predictions, references=cat_labels, average='weighted')
sub_cat_computed_metrics = SUB_CAT_METRIC.compute(predictions=sub_cat_predictions, references=sub_cat_labels, average='weighted')
motive_computed_metrics = MOTIVE_METRIC.compute(predictions=motive_predictions, references=motive_labels, average='weighted')

print("SCORE")
print(line_computed_metrics, cat_computed_metrics, sub_cat_computed_metrics, motive_computed_metrics)

return {
    'f1_line': line_computed_metrics['f1'],
    'f1_cat': cat_computed_metrics['f1'],
    'f1_sub_cat': sub_cat_computed_metrics['f1'],
    'f1_motive': motive_computed_metrics['f1'],
}

` output_directory = RESULTS_DIRECTORY evaluation_strategy = 'epoch' per_device_train_batch_size = 4 per_device_eval_batch_size = 4 gradint_accumulation_steps = 2 learning_rate = 2e-5 weight_decay = 0.01 max_grad_norm = 1 num_train_epochs = NUM_TRAIN_EPOCHS lr_scheduler_type = 'linear' warmup_ratio = 0.05 logging_dir = LOGGING_DIRECTORY logging_strategy = 'epoch' save_strategy = 'epoch' save_total_limit = 1 label_names = ['line_labels', 'cat_labels', 'sub_cal_label','motive_labels'] load_best_model_at_end = True metric_for_best_model = 'eval_f1_cat' greater_is_better = True label_smoothing_factor = 0

report_to = 'tensorboard'

gradient_checkpointing = False `

Setup training arguments

training_args = TrainingArguments( output_dir=output_directory, evaluation_strategy=evaluation_strategy, learning_rate=learning_rate, per_device_train_batch_size=per_device_train_batch_size, per_device_eval_batch_size=per_device_eval_batch_size, num_train_epochs=num_train_epochs, weight_decay=weight_decay, logging_dir=logging_dir, label_names=label_names, max_grad_norm=max_grad_norm, lr_scheduler_type=lr_scheduler_type, warmup_ratio=warmup_ratio, logging_strategy=logging_strategy, save_strategy=save_strategy, save_total_limit=save_total_limit, load_best_model_at_end=load_best_model_at_end,

metric_for_best_model=metric_for_best_model,

#greater_is_better=greater_is_better,
label_smoothing_factor=label_smoothing_factor,
#report_to=report_to,
gradient_checkpointing=gradient_checkpointing

)

early_stop_callback = EarlyStoppingCallback(3)

Initialize the Trainer

trainer = Trainer( model=model, args=training_args, train_dataset=data['train'], eval_dataset=data['test'],

tokenizer=tokenizer,

compute_metrics=compute_metrics,
data_collator=data_collator,
#callbacks=[early_stop_callback])

Error:

` KeyError Traceback (most recent call last) Cell In[36], line 1 ----> 1 trainer.train()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1859, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1857 hf_hub_utils.enable_progress_bars() 1858 else: -> 1859 return inner_training_loop( 1860 args=args, 1861 resume_from_checkpoint=resume_from_checkpoint, 1862 trial=trial, 1863 ignore_keys_for_eval=ignore_keys_for_eval, 1864 )

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2298, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 2295 self.control.should_training_stop = True 2297 self.control = self.callback_handler.on_epoch_end(args, self.state, self.control) -> 2298 self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval) 2300 if DebugOption.TPU_METRICS_DEBUG in self.args.debug: 2301 if is_torch_xla_available(): 2302 # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2673, in Trainer._maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval) 2670 self.lr_scheduler.step(metrics[metric_to_check]) 2672 if self.control.should_save: -> 2673 self._save_checkpoint(model, trial, metrics=metrics) 2674 self.control = self.callback_handler.on_save(self.args, self.state, self.control)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2765, in Trainer._save_checkpoint(self, model, trial, metrics) 2763 if not metric_tocheck.startswith("eval"): 2764 metric_tocheck = f"eval{metric_to_check}" -> 2765 metric_value = metrics[metric_to_check] 2767 operator = np.greater if self.args.greater_is_better else np.less 2768 if ( 2769 self.state.best_metric is None 2770 or self.state.best_model_checkpoint is None 2771 or operator(metric_value, self.state.best_metric) 2772 ):

KeyError: 'eval_loss' `

kuldeep7688 / multi_task_text_classification

Help with multi task classification with DistilBert with 4 labels based on your repo #1