是否已有关于该错误的issue或讨论? | Is there an existing issue / discussion for this?
[X] 我已经搜索过已有的issues和讨论 | I have searched the existing issues / discussions
该问题是否在FAQ中有解答? | Is there an existing answer for this in FAQ?
[X] 我已经搜索过FAQ | I have searched FAQ
当前行为 | Current Behavior
I got RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half
max_steps is given, it will override any value given in num_train_epochs
0% 0/10000 [00:00<?, ?it/s]Traceback (most recent call last):
File "/content/MiniCPM-V/finetune/finetune.py", line 333, in
train()
File "/content/MiniCPM-V/finetune/finetune.py", line 323, in train
trainer.train()
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1859, in train
return inner_training_loop(
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2203, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/content/MiniCPM-V/finetune/trainer.py", line 203, in training_step
loss = self.compute_loss(model, inputs)
File "/content/MiniCPM-V/finetune/trainer.py", line 28, in compute_loss
outputs = self.model.base_model(data = inputs, use_cache=False)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py", line 179, in forward
return self.model.forward(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/modeling_minicpmv.py", line 141, in forward
vllm_embedding, vision_hidden_states = self.get_vllm_embedding(data)
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/modeling_minicpmv.py", line 94, in get_vllm_embedding
vision_hidden_states.append(self.get_vision_embedding(pixel_values))
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/modeling_minicpmv.py", line 85, in get_vision_embedding
res.append(self.resampler(vision_embedding, tgt_size))
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/resampler.py", line 158, in forward
out = self.attn(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/activation.py", line 1241, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 5300, in multi_head_attention_forward
q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 4846, in _in_projection_packed
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half
期望行为 | Expected Behavior
No response
复现方法 | Steps To Reproduce
LLM_TYPE="minicpm" # if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm
是否已有关于该错误的issue或讨论? | Is there an existing issue / discussion for this?
该问题是否在FAQ中有解答? | Is there an existing answer for this in FAQ?
当前行为 | Current Behavior
I got RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half
max_steps is given, it will override any value given in num_train_epochs 0% 0/10000 [00:00<?, ?it/s]Traceback (most recent call last): File "/content/MiniCPM-V/finetune/finetune.py", line 333, in
train()
File "/content/MiniCPM-V/finetune/finetune.py", line 323, in train
trainer.train()
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1859, in train
return inner_training_loop(
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2203, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/content/MiniCPM-V/finetune/trainer.py", line 203, in training_step
loss = self.compute_loss(model, inputs)
File "/content/MiniCPM-V/finetune/trainer.py", line 28, in compute_loss
outputs = self.model.base_model(data = inputs, use_cache=False)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py", line 179, in forward
return self.model.forward(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/modeling_minicpmv.py", line 141, in forward
vllm_embedding, vision_hidden_states = self.get_vllm_embedding(data)
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/modeling_minicpmv.py", line 94, in get_vllm_embedding
vision_hidden_states.append(self.get_vision_embedding(pixel_values))
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/modeling_minicpmv.py", line 85, in get_vision_embedding
res.append(self.resampler(vision_embedding, tgt_size))
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/openbmb/MiniCPM-V-2/caaa1661c909108ceb65f3ef0108a80c986f183f/resampler.py", line 158, in forward
out = self.attn(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/activation.py", line 1241, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 5300, in multi_head_attention_forward
q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 4846, in _in_projection_packed
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half
期望行为 | Expected Behavior
No response
复现方法 | Steps To Reproduce
LLM_TYPE="minicpm" # if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm
DISTRIBUTED_ARGS="
--nproc_per_node $GPUS_PER_NODE \
--nnodes $NNODES \
--node_rank $NODE_RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT
"
CUDA_LAUNCH_BLOCKING=1 python finetune.py \ --model_name_or_path $MODEL \ --llm_type $LLM_TYPE \ --data_path $DATA \ --eval_data_path $EVAL_DATA \ --remove_unused_columns false \ --label_names "labels" \ --prediction_loss_only false \ --bf16 false \ --bf16_full_eval false \ --fp16 true \ --fp16_full_eval true \ --do_train \ --do_eval \ --tune_vision true \ --tune_llm false \ --use_lora true \ --lora_target_modules "llm..*layers.\d+.self_attn.(q_proj|k_proj)" \ --model_max_length 64 \ --max_slice_nums 9 \ --scale_resolution 112 \ --max_steps 10000 \ --eval_steps 1000 \ --output_dir output/output_minicpmv2_lora \ --logging_dir output/output_minicpmv2_lora \ --logging_strategy "steps" \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 1 \ --evaluation_strategy "steps" \ --save_strategy "steps" \ --save_steps 1000 \ --save_total_limit 10 \ --learning_rate 1e-6 \ --weight_decay 0.1 \ --adam_beta2 0.95 \ --warmup_ratio 0.01 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --gradient_checkpointing true \ --report_to "tensorboard" # wandb
运行环境 | Environment
备注 | Anything else?
No response