Error in DPO for Llama 2 chat model

Installation commands:

conda create -n llama_etuning python=3.10
conda activate llama_etuning
cd LLaMA-Efficient-Tuning
pip install -r requirements.txt
pip install protobuf

Run command:

CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --stage dpo \
    --model_name_or_path meta-llama/Llama-2-7b-chat-hf  \
    --do_train \
    --dataset comparison_gpt4_en \
    --template default \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --resume_lora_training False \
    --output_dir models \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
    --plot_loss \
    --fp16

Error:

CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
>     --stage dpo \
>     --model_name_or_path meta-llama/Llama-2-7b-chat-hf  \
>     --do_train \
>     --dataset comparison_gpt4_en \
>     --template default \
>     --finetuning_type lora \
>     --lora_target q_proj,v_proj \
>     --resume_lora_training False \
>     --output_dir models \
>     --per_device_train_batch_size 2 \
>     --gradient_accumulation_steps 4 \
>     --lr_scheduler_type cosine \
>     --logging_steps 10 \
>     --save_steps 1000 \
>     --learning_rate 1e-5 \
>     --num_train_epochs 1.0 \
>     --plot_loss \
>     --fp16

08/25/2023 11:55:43 - WARNING - llmtuner.tuner.core.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
[INFO|training_args.py:1327] 2023-08-25 11:55:43,329 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
[INFO|training_args.py:1769] 2023-08-25 11:55:43,329 >> PyTorch: setting up devices
/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/transformers/training_args.py:1672: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.
  warnings.warn(
08/25/2023 11:55:43 - INFO - llmtuner.tuner.core.parser - Process rank: 0, device: cuda:0, n_gpu: 1
  distributed training: True, compute dtype: torch.float16
08/25/2023 11:55:43 - INFO - llmtuner.tuner.core.parser - Training/evaluation parameters Seq2SeqTrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=False,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_config=None,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=4,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_always_push=False,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=1e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=models/runs/Aug25_11-55-43_ece-895238.austin.utexas.edu,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=10,
logging_strategy=steps,
lr_scheduler_type=cosine,
max_grad_norm=1.0,
max_steps=-1,
metric_for_best_model=None,
mp_parameters=,
no_cuda=False,
num_train_epochs=1.0,
optim=adamw_torch,
optim_args=None,
output_dir=models,
overwrite_output_dir=False,
past_index=-1,
per_device_eval_batch_size=8,
per_device_train_batch_size=2,
predict_with_generate=False,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=True,
report_to=[],
resume_from_checkpoint=None,
run_name=models,
save_on_each_node=False,
save_safetensors=False,
save_steps=1000,
save_strategy=steps,
save_total_limit=None,
seed=42,
sharded_ddp=[],
skip_memory_metrics=True,
sortish_sampler=False,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_cpu=False,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=0,
weight_decay=0.0,
)
08/25/2023 11:55:43 - INFO - llmtuner.dsets.loader - Loading dataset comparison_gpt4_data_en.json...
/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/datasets/load.py:2072: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=None' instead.
  warnings.warn(
Using custom data configuration default-e59041cbcf1c2288
Loading Dataset Infos from /home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/datasets/packaged_modules/json
Overwrite dataset info from restored data version if exists.
Loading Dataset info from /home/ti2877/.cache/huggingface/datasets/json/default-e59041cbcf1c2288/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
Found cached dataset json (/home/ti2877/.cache/huggingface/datasets/json/default-e59041cbcf1c2288/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
Loading Dataset info from /home/ti2877/.cache/huggingface/datasets/json/default-e59041cbcf1c2288/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
[INFO|tokenization_utils_base.py:1852] 2023-08-25 11:55:43,741 >> loading file tokenizer.model from cache at /home/ti2877/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/tokenizer.model
[INFO|tokenization_utils_base.py:1852] 2023-08-25 11:55:43,741 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:1852] 2023-08-25 11:55:43,741 >> loading file special_tokens_map.json from cache at /home/ti2877/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/special_tokens_map.json
[INFO|tokenization_utils_base.py:1852] 2023-08-25 11:55:43,741 >> loading file tokenizer_config.json from cache at /home/ti2877/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/tokenizer_config.json
[INFO|configuration_utils.py:715] 2023-08-25 11:55:43,837 >> loading configuration file config.json from cache at /home/ti2877/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/config.json
[INFO|configuration_utils.py:775] 2023-08-25 11:55:43,839 >> Model config LlamaConfig {
  "_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.32.0",
  "use_cache": true,
  "vocab_size": 32000
}

[INFO|modeling_utils.py:2779] 2023-08-25 11:55:43,870 >> loading weights file model.safetensors from cache at /home/ti2877/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/model.safetensors.index.json
[INFO|modeling_utils.py:1191] 2023-08-25 11:55:43,870 >> Instantiating LlamaForCausalLM model under default dtype torch.float16.
[INFO|configuration_utils.py:768] 2023-08-25 11:55:43,871 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "transformers_version": "4.32.0"
}

Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  7.63it/s]
[INFO|modeling_utils.py:3551] 2023-08-25 11:55:44,314 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:3559] 2023-08-25 11:55:44,314 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py:730] 2023-08-25 11:55:44,374 >> loading configuration file generation_config.json from cache at /home/ti2877/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/08751db2aca9bf2f7f80d2e516117a53d7450235/generation_config.json
[INFO|configuration_utils.py:768] 2023-08-25 11:55:44,374 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_length": 4096,
  "pad_token_id": 0,
  "temperature": 0.6,
  "top_p": 0.9,
  "transformers_version": "4.32.0"
}

08/25/2023 11:55:44 - INFO - llmtuner.tuner.core.adapter - Fine-tuning method: LoRA
08/25/2023 11:55:55 - INFO - llmtuner.tuner.core.loader - trainable params: 4194304 || all params: 6742609920 || trainable%: 0.0622
08/25/2023 11:55:55 - INFO - llmtuner.extras.template - Add pad token: <unk>
[INFO|tokenization_utils_base.py:926] 2023-08-25 11:55:55,618 >> Assigning [] to the additional_special_tokens key of the tokenizer
Loading cached processed dataset at /home/ti2877/.cache/huggingface/datasets/json/default-e59041cbcf1c2288/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-530c664b46180107.arrow
Running tokenizer on dataset:   0%|                                                                                     | 0/36441 [00:00<?, ? examples/s]Caching processed dataset at /home/ti2877/.cache/huggingface/datasets/json/default-e59041cbcf1c2288/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-205ba5bc33e9f0b8.arrow
Running tokenizer on dataset: 100%|████████████████████████████████████████████████████████████████████████| 36441/36441 [00:54<00:00, 665.39 examples/s]
prompt_ids:
[1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 29871, 13, 12968, 29901, 1724, 526, 278, 2211, 7601, 11955, 29973, 13, 7900, 22137, 29901, 29871]
prompt:
<s> A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. 
 Human: What are the three primary colors?
Assistant: 
chosen_ids:
[450, 2211, 7601, 11955, 526, 2654, 29892, 7254, 29892, 322, 13328, 29889, 4525, 11955, 526, 2000, 7601, 1363, 896, 2609, 367, 2825, 491, 24907, 916, 11955, 322, 599, 916, 11955, 508, 367, 1754, 491, 29299, 963, 297, 5164, 12098, 1080, 29889, 512, 278, 788, 3321, 2927, 1788, 29892, 1304, 363, 3578, 29892, 278, 7601, 11955, 526, 2654, 29892, 7933, 29892, 322, 7254, 313, 28212, 467, 2]
chosen:
The three primary colors are red, blue, and yellow. These colors are called primary because they cannot be created by mixing other colors and all other colors can be made by combining them in various proportions. In the additive color system, used for light, the primary colors are red, green, and blue (RGB).</s>
rejected_ids:
[4367, 29892, 612, 4743, 29892, 322, 7646, 29889, 2]
rejected:
Red, Yellow, and Green.</s>
[INFO|training_args.py:1327] 2023-08-25 11:56:50,400 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
[INFO|training_args.py:1769] 2023-08-25 11:56:50,400 >> PyTorch: setting up devices
[INFO|trainer.py:1714] 2023-08-25 11:56:54,818 >> ***** Running training *****
[INFO|trainer.py:1715] 2023-08-25 11:56:54,818 >>   Num examples = 36,441
[INFO|trainer.py:1716] 2023-08-25 11:56:54,818 >>   Num Epochs = 1
[INFO|trainer.py:1717] 2023-08-25 11:56:54,818 >>   Instantaneous batch size per device = 2
[INFO|trainer.py:1720] 2023-08-25 11:56:54,818 >>   Total train batch size (w. parallel, distributed & accumulation) = 8
[INFO|trainer.py:1721] 2023-08-25 11:56:54,818 >>   Gradient Accumulation steps = 4
[INFO|trainer.py:1722] 2023-08-25 11:56:54,818 >>   Total optimization steps = 4,555
[INFO|trainer.py:1723] 2023-08-25 11:56:54,820 >>   Number of trainable parameters = 4,194,304
  0%|                                                                                                                           | 0/4555 [00:00<?, ?it/s]Traceback (most recent call last):
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/peft/peft_model.py", line 434, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1614, in __getattr__
    raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'PeftModelForCausalLM' object has no attribute 'pretrained_model'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/peft/tuners/lora.py", line 492, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1614, in __getattr__
    raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'LoraModel' object has no attribute 'pretrained_model'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ti2877/LLaMA-Efficient-Tuning/src/train_bash.py", line 14, in <module>
    main()
  File "/home/ti2877/LLaMA-Efficient-Tuning/src/train_bash.py", line 5, in main
    run_exp()
  File "/home/ti2877/LLaMA-Efficient-Tuning/src/llmtuner/tuner/tune.py", line 32, in run_exp
    run_dpo(model_args, data_args, training_args, finetuning_args, callbacks)
  File "/home/ti2877/LLaMA-Efficient-Tuning/src/llmtuner/tuner/dpo/workflow.py", line 53, in run_dpo
    train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
    return inner_training_loop(
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/transformers/trainer.py", line 1837, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/transformers/trainer.py", line 2682, in training_step
    loss = self.compute_loss(model, inputs)
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 396, in compute_loss
    loss, metrics = self.get_batch_metrics(model, inputs, train_eval="train")
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 350, in get_batch_metrics
    with self.accelerator.unwrap_model(self.model).pretrained_model.disable_adapter():
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/peft/peft_model.py", line 436, in __getattr__
    return getattr(self.base_model, name)
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/peft/tuners/lora.py", line 494, in __getattr__
    return getattr(self.model, name)
  File "/home/ti2877/miniconda3/envs/llama_etuning/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1614, in __getattr__
    raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'LlamaForCausalLM' object has no attribute 'pretrained_model'. Did you mean: '_load_pretrained_model'?

Seems like a variable name is wrong somehow? Is this a versioning issue? Any help is appreciated.

hiyouga / LLaMA-Factory

Error in DPO for Llama 2 chat model #693