Since the latest version of trl 0.12.0 now takes in processing_class instead of tokenizer
So, we need to change
from transformers import TrainingArguments
from trl import DPOTrainer, DPOConfig
from unsloth import is_bfloat16_supported
# the newest version of trl now uses processing_class instead of tokenizer
dpo_trainer = DPOTrainer(
model=model,
ref_model=None,
args=DPOConfig(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_ratio=0.1,
num_train_epochs=3,
learning_rate=5e-6,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_steps=1,
optim="adamw_8bit",
weight_decay=0.0,
lr_scheduler_type="linear",
seed=42,
output_dir="outputs",
report_to="none", # Use this for WandB etc.
),
beta=0.1,
train_dataset=raw_datasets["train"],
#tokenizer=tokenizer,
processing_class=tokenizer,
max_length=1024,
max_prompt_length=512,
)
And for some reason the unsloth FastLanguageModel.from_pretrained tokenizer does not work well with the processsing_class so we need to import original tokenizer
## for the DPO colab
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/zephyr-sft-bnb-4bit")
## For the ORPO colab notebook
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-bnb-4bit")
instead of
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = model_name,
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
This draft is a temporary fix to this issue 1285
Since the latest version of trl
0.12.0
now takes inprocessing_class
instead oftokenizer
So, we need to changeAnd for some reason the unsloth FastLanguageModel.from_pretrained tokenizer does not work well with the processsing_class so we need to import original tokenizer
instead of
@danielhanchen