Closed pretbc closed 3 months ago
also I see an issue when trying to merge
python src/merge_lora_weights.py --model-path output/lora_vision_test --model-base Phi-3-vision-128k-instruct --save-model-path output/merged_output --safe-serialization
/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead
warnings.warn(
Loading Phi3-Vision from base model...
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
Traceback (most recent call last):
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 31, in <module>
merge_lora(args)
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 6, in merge_lora
processor, model = load_pretrained_model(args.model_path, args.model_base, model_name, device_map='cpu')
File "/tmp/pycharm_project_444/src/utils.py", line 44, in load_pretrained_model
model = Phi3VForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, **kwargs)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3941, in from_pretrained
) = cls._load_pretrained_model(
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4415, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 938, in _load_state_dict_into_meta_model
hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 191, in create_quantized_param
raise ValueError(
ValueError: Supplied state dict for model.layers.0.mlp.down_proj.weight does not contain `bitsandbytes__*` and possibly other `quantized_stats` components.
output ls
ls output/lora_vision_test/
adapter_config.json adapter_model.safetensors checkpoint-178 config.json non_lora_state_dict.bin README.md trainer_state.json
ls output/lora_vision_test/checkpoint-178/
adapter_config.json config.json latest model-00002-of-00002.safetensors preprocessor_config.json rng_state.pth special_tokens_map.json tokenizer.json training_args.bin
adapter_model.safetensors global_step178 model-00001-of-00002.safetensors model.safetensors.index.json README.md scheduler.pt tokenizer_config.json trainer_state.json zero_to_fp32.py
train script
torchrun --nproc_per_node=1 \
src/training/train.py \
--lora_enable True \
--vision_lora True \
--lora_namespan_exclude "['lm_head']" \
--lora_rank 32 \
--lora_alpha 16 \
--lora_dropout 0.05 \
--num_lora_modules -1 \
--deepspeed scripts/zero2.json \
--model_id microsoft/Phi-3-vision-128k-instruct \
--data_path llava_format.json \
--image_folder /cherry \
--tune_img_projector True \
--freeze_vision_tower False \
--bf16 False \
--output_dir output/lora_vision_test \
--num_train_epochs 2 \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 1 \
--learning_rate 2e-4 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--tf32 False \
--gradient_checkpointing True \
--report_to wandb \
--lazy_preprocess True \
--dataloader_num_workers 4 \
--disable_flash_attn2 True \
--bits 4
@pretbc Sorry for your inconvinience. There is a bash script for merging lora. You could use it. Also, I'll find the issue and solve it soon. Again, I'm sorry for the inconvinience.
@pretbc It seems like for the QLoRa, You need to adjust the code for it. import argparse from utils import get_model_name_from_path, load_pretrained_model
import argparse
from utils import get_model_name_from_path, load_pretrained_model
def merge_lora(args):
model_name = get_model_name_from_path(args.model_path)
processor, model = load_pretrained_model(model_path=args.model_path, model_base=args.model_base,
model_name=model_name, device_map='cpu',
load_8bit=args.load_8bit, load_4bit=args.load_4bit)
if args.safe_serialization:
from accelerate import Accelerator
accel = Accelerator()
# You could set the shard size whatever you want
accel.save_model(model, args.save_model_path, max_shard_size = '5GB')
model.config.save_pretrained(args.save_model_path)
processor.save_pretrained(args.save_model_path)
else:
model.save_pretrained(args.save_model_path, safe_serialization=False)
processor.save_pretrained(args.save_model_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model-path", type=str, required=True)
parser.add_argument("--model-base", type=str, required=True)
parser.add_argument("--save-model-path", type=str, required=True)
parser.add_argument("--safe-serialization", action='store_true')
parser.add_argument("--load-8bit", action='store_true')
parser.add_argument("--load-4bit", action='store_true')
args = parser.parse_args()
merge_lora(args)
This is the code for it. Can you add your quantized option for it?
Hello there:
I pasted your code to the lmerge_lora_weights.py but that did not resolve issue
/tmp/pycharm_project_444$ python src/merge_lora_weights.py --model-path output/lora_vision_test --model-base Phi-3-vision-128k-instruct --save-model-path output/merged_output --safe-serialization --load-4bit
/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead
warnings.warn(
Loading Phi3-Vision from base model...
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/quantizers/auto.py:174: UserWarning: You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used.
warnings.warn(warning_msg)
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
Traceback (most recent call last):
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 67, in <module>
merge_lora(args)
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 38, in merge_lora
processor, model = load_pretrained_model(model_path=args.model_path, model_base=args.model_base,
File "/tmp/pycharm_project_444/src/utils.py", line 44, in load_pretrained_model
model = Phi3VForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, **kwargs)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3941, in from_pretrained
) = cls._load_pretrained_model(
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4415, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 938, in _load_state_dict_into_meta_model
hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 191, in create_quantized_param
raise ValueError(
ValueError: Supplied state dict for model.layers.0.mlp.down_proj.weight does not contain `bitsandbytes__*` and possibly other `quantized_stats` components.
@2U1 any update ?
@pretbc Not much. Instead using zero3_offload
without Quantization can used for training.
I will work around a bit more.
I don't know rly if this can be a case but there is an waring when you pass lora config
UserWarning: You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used.
warnings.warn(warning_msg)
Loading Phi3-Vision from base model...
Phi3VConfig {
"_name_or_path": "Phi-35-vision-128k-instruct",
"architectures": [
"Phi3VForCausalLM"
],
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_phi3_v.Phi3VConfig",
"AutoModelForCausalLM": "modeling_phi3_v.Phi3VForCausalLM"
},
"bos_token_id": 1,
"embd_layer": {
"embedding_cls": "image",
"hd_transform_order": "sub_glb",
"projection_cls": "mlp",
"use_hd_transform": true,
"with_learnable_separator": true
},
"embd_pdrop": 0.0,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 3072,
"img_processor": {
"image_dim_out": 1024,
"model_name": "openai/clip-vit-large-patch14-336",
"name": "clip_vision_model",
"num_img_tokens": 144
},
"initializer_range": 0.02,
"intermediate_size": 8192,
"max_position_embeddings": 131072,
"model_type": "phi3_v",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"original_max_position_embeddings": 4096,
"pad_token_id": 32000,
"projector_lr": null,
"quantization_config": {
"_load_in_4bit": true,
"_load_in_8bit": false,
"bnb_4bit_compute_dtype": "bfloat16",
"bnb_4bit_quant_storage": "uint8",
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": true,
"llm_int8_enable_fp32_cpu_offload": false,
"llm_int8_has_fp16_weight": false,
"llm_int8_skip_modules": [
"img_projection"
],
"llm_int8_threshold": 6.0,
"load_in_4bit": true,
"load_in_8bit": false,
"quant_method": "bitsandbytes"
},
"resid_pdrop": 0.0,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"long_factor": [
1.0800000429153442,
1.1100000143051147,
1.1399999856948853,
1.340000033378601,
1.5899999141693115,
1.600000023841858,
1.6200000047683716,
2.620000123977661,
3.2300000190734863,
3.2300000190734863,
4.789999961853027,
7.400000095367432,
7.700000286102295,
9.09000015258789,
12.199999809265137,
17.670000076293945,
24.46000099182129,
28.57000160217285,
30.420001983642578,
30.840002059936523,
32.590003967285156,
32.93000411987305,
42.320003509521484,
44.96000289916992,
50.340003967285156,
50.45000457763672,
57.55000305175781,
57.93000411987305,
58.21000289916992,
60.1400032043457,
62.61000442504883,
62.62000274658203,
62.71000289916992,
63.1400032043457,
63.1400032043457,
63.77000427246094,
63.93000411987305,
63.96000289916992,
63.970001220703125,
64.02999877929688,
64.06999969482422,
64.08000183105469,
64.12000274658203,
64.41000366210938,
64.4800033569336,
64.51000213623047,
64.52999877929688,
64.83999633789062
],
"short_factor": [
1.08,
1.1,
1.1300000000000001,
1.2800000000000002,
1.3100000000000003,
1.4500000000000004,
1.4500000000000004,
1.9500000000000008,
2.030000000000001,
2.4299999999999926,
2.5699999999999896,
2.9499999999999815,
3.729999999999965,
3.869999999999962,
4.189999999999955,
4.43999999999995,
4.6399999999999455,
4.979999999999938,
5.159999999999934,
5.279999999999932,
5.759999999999922,
5.889999999999919,
5.889999999999919,
5.969999999999917,
6.089999999999915,
6.2799999999999105,
6.7699999999999,
6.8899999999998975,
7.109999999999893,
7.129999999999892,
7.179999999999891,
7.289999999999889,
7.339999999999888,
7.559999999999883,
7.619999999999882,
7.69999999999988,
7.879999999999876,
7.879999999999876,
7.879999999999876,
7.939999999999875,
7.949999999999875,
7.979999999999874,
8.19999999999987,
8.439999999999864,
8.469999999999864,
8.589999999999861,
8.809999999999857,
8.999999999999853
],
"type": "su"
},
"rope_theta": 10000.0,
"sliding_window": 262144,
"tie_word_embeddings": false,
"tokenizer_model_max_length": 131072,
"tokenizer_padding_side": "right",
"torch_dtype": "bfloat16",
"transformers_version": "4.44.0",
"use_cache": true,
"vision_lr": null,
"vocab_size": 32064
}
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/quantizers/auto.py:174: UserWarning: You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used.
warnings.warn(warning_msg)
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
Traceback (most recent call last):
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 67, in <module>
merge_lora(args)
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 38, in merge_lora
processor, model = load_pretrained_model(model_path=args.model_path, model_base=args.model_base,
File "/tmp/pycharm_project_444/src/utils.py", line 45, in load_pretrained_model
model = Phi3VForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, **kwargs)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3941, in from_pretrained
) = cls._load_pretrained_model(
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4415, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/modeling_utils.py", line 938, in _load_state_dict_into_meta_model
hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 191, in create_quantized_param
raise ValueError(
ValueError: Supplied state dict for model.layers.0.mlp.down_proj.weight does not contain `bitsandbytes__*` and possibly other `quantized_stats` components.
@pretbc I was working around and you can extract the quantization_config
from the config.json but, still can't merge it.
However you can still use 'model.load_adpater' method to use your weights.
@pretbc
import argparse
from utils import get_model_name_from_path, load_pretrained_model
import torch
from transformers import AutoModelForCausalLM, AutoProcessor
from peft import PeftModel
from accelerate import Accelerator
import os
def merge_lora(args):
model_name = get_model_name_from_path(args.model_path)
# processor, model = load_pretrained_model(model_path=args.model_path, model_base=args.model_base,
# model_name=model_name, device_map='cpu',
# load_8bit=args.load_8bit, load_4bit=args.load_4bit)
model = AutoModelForCausalLM.from_pretrained('microsoft/Phi-3.5-vision-instruct', low_cpu_mem_usage=True, trust_remote_code=True, torch_dtype=torch.float16)
processor = AutoProcessor.from_pretrained('microsoft/Phi-3.5-vision-instruct', trust_remote_code=True)
token_num, tokem_dim = model.lm_head.out_features, model.lm_head.in_features
if model.lm_head.weight.shape[0] != token_num:
model.lm_head.weight = torch.nn.Parameter(torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
model.model.embed_tokens.weight = torch.nn.Parameter(torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
print('Loading additional Phi3-Vision weights...')
non_lora_trainables = torch.load(os.path.join(args.model_path, 'non_lora_state_dict.bin'), map_location='cpu')
non_lora_trainables = {(k[11:] if k.startswith('base_model.') else k): v for k, v in non_lora_trainables.items()}
if any(k.startswith('model.model.') for k in non_lora_trainables):
non_lora_trainables = {(k[6:] if k.startswith('model.') else k): v for k, v in non_lora_trainables.items()}
model.load_state_dict(non_lora_trainables, strict=False)
print('Loading LoRA weights...')
model = PeftModel.from_pretrained(model, args.model_path)
print('Merging LoRA weights...')
model = model.merge_and_unload()
print('Model Loaded!!!')
if args.safe_serialization:
from accelerate import Accelerator
accel = Accelerator()
# You could set the shard size whatever you want
accel.save_model(model, args.save_model_path, max_shard_size = '5GB')
model.config.save_pretrained(args.save_model_path)
processor.save_pretrained(args.save_model_path)
else:
model.save_pretrained(args.save_model_path, safe_serialization=False)
processor.save_pretrained(args.save_model_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model-path", type=str, required=True)
parser.add_argument("--model-base", type=str, required=True)
parser.add_argument("--save-model-path", type=str, required=True)
parser.add_argument("--safe-serialization", action='store_true')
args = parser.parse_args()
merge_lora(args)
This will work fine for you. I just tested.
It was the problem that config.json
already has a quantization_config
and when using PeftModel.from_pretrained
the base model shouldn't be quantized. I've fixed the repo code too.
I really appreicate for developing the codes and againg sorry for the inconvinience.
hmm I pull master and did re-training with the same params.
Adnd I end-up with some mismatch of sizes
You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2.37it/s]
/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:513: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead
warnings.warn(
Loading additional Phi3-Vision weights...
/tmp/pycharm_project_444/src/merge_lora_weights.py:58: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
non_lora_trainables = torch.load(os.path.join(args.model_path, 'non_lora_state_dict.bin'), map_location='cpu')
Traceback (most recent call last):
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 95, in <module>
merge_lora(args)
File "/tmp/pycharm_project_444/src/merge_lora_weights.py", line 62, in merge_lora
model.load_state_dict(non_lora_trainables, strict=False)
File "/home/pretbc/.virtualenvs/Phi3-Vision-Finetune/lib/python3.10/site-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
RuntimeError: Error(s) in loading state_dict for Phi3VForCausalLM:
size mismatch for lm_head.weight: copying a param with shape torch.Size([49250304, 1]) from checkpoint, the shape in current model is torch.Size([32064, 3072]).
same error for above code snippet , and current master merge_lora
implementation
@pretbc Oh, Sorry I haven't wrote that, when tuning embed_tokens
with LoRA, You need to tune lm_head
with it.
You should fix your scripts like this.
--lora_namespan_exclude "['lm_head', 'embed_tokens']" \
Great news - its working :) Gonna close issue as resolved
Hello after training Qlora I got produce checkpoint under
Is this correct approach to run merg lora sh ?
and then
thx