Open whymusticode opened 1 week ago
class CustomTrainer(SFTTrainer):
def __init__(self, label_weights, **kwargs):
super().__init__(**kwargs)
self.label_weights = label_weights
def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
results = super().evaluate(eval_dataset, ignore_keys, metric_key_prefix)
validation_loss = results[f"{metric_key_prefix}_loss"]
lr = self.optimizer.param_groups[0]['lr']
out = f'Step: {self.state.global_step}, Validation Loss: {validation_loss:.4f}, Learning Rate: {lr:.6f}'
with open(logging_dir + '/eval_log.txt', 'a') as logSave:
logSave.write(out + '\n')
return results
def log(self, logs):
super().log(logs)
with open(logging_dir + '/train_log.txt', 'a') as logSave:
logSave.write(str(logs) + '\n')
trainer = CustomTrainer(
label_weights=None,
model=model,
train_dataset=dataSubSplit['train'],
eval_dataset=dataSubSplit['test'],
dataset_text_field="transcript",
max_seq_length=max_seq_length,
tokenizer=tokenizer,
callbacks=[early_stopping_callback],
args=TrainingArguments(
per_device_train_batch_size=batchSize,
per_device_eval_batch_size=batchSize,
gradient_accumulation_steps=gradAccum,
warmup_steps=50,
learning_rate=10e-5,
num_train_epochs=5,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
output_dir=saveFolder,
optim="adamw_8bit",
seed=3407,
save_strategy='steps',
save_steps=saveSteps,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
evaluation_strategy="steps",
eval_steps=saveSteps,
logging_steps=1,
logging_dir=logging_dir,
log_level="info",
),
)
^ this works just fine, but I figure it's not necessary, and I just want to see what the right way to do it is
Much apologies on the delay - my bro and I relocated to the US, hence the slowness.
TBH I never bothered to check where the logs are going lol - it should be in logging_dir
- is it in the absolute or relative path? Maybe try adding an absolute path
I can not get logging to work with unsloth, what am I doing wrong?
I'm doing the following:
https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py is the correct documentation I believe. I've spent a few hours trying different combinations of arguments
nothing shows up in saveFolder+'/log' I can't find any log files being generated anywhere.