RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (CUDABFloat16Type) should be the same:
Details is :
Traceback (most recent call last):
File "/datadisk/ai-prj/swift/examples/pytorch/llm/llm_sft.py", line 7, in
output = sft_main()
File "/datadisk/ai-prj/swift/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, kwargs)
File "/datadisk/ai-prj/swift/swift/llm/sft.py", line 261, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/datadisk/ai-prj/swift/swift/trainers/trainers.py", line 54, in train
res = super().train(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/transformers/trainer.py", line 1780, in train
return inner_training_loop(
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/transformers/trainer.py", line 2118, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/transformers/trainer.py", line 3036, in training_step
loss = self.compute_loss(model, inputs)
File "/datadisk/ai-prj/swift/swift/trainers/trainers.py", line 221, in compute_loss
outputs = model(inputs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/utils/operations.py", line 825, in forward
return model_forward(*args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/utils/operations.py", line 813, in call
return convert_to_fp32(self.model_forward(*args, *kwargs))
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/peft/peft_model.py", line 1129, in forward
return self.base_model(
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 161, in forward
return self.model.forward(*args, *kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/Qwen-VL-Chat/modeling_qwen.py", line 856, in forward
transformer_outputs = self.transformer(
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/Qwen-VL-Chat/modeling_qwen.py", line 571, in forward
images = self.visual(fake_images)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/Qwen-VL-Chat/visual.py", line 398, in forward
x = self.conv1(x) # shape = [, width, grid, grid]
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (CUDABFloat16Type) should be the same
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (CUDABFloat16Type) should be the same:
Details is :
Traceback (most recent call last): File "/datadisk/ai-prj/swift/examples/pytorch/llm/llm_sft.py", line 7, in
output = sft_main()
File "/datadisk/ai-prj/swift/swift/utils/run_utils.py", line 31, in x_main
result = llm_x(args, kwargs)
File "/datadisk/ai-prj/swift/swift/llm/sft.py", line 261, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/datadisk/ai-prj/swift/swift/trainers/trainers.py", line 54, in train
res = super().train(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/transformers/trainer.py", line 1780, in train
return inner_training_loop(
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/transformers/trainer.py", line 2118, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/transformers/trainer.py", line 3036, in training_step
loss = self.compute_loss(model, inputs)
File "/datadisk/ai-prj/swift/swift/trainers/trainers.py", line 221, in compute_loss
outputs = model(inputs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/utils/operations.py", line 825, in forward
return model_forward(*args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/utils/operations.py", line 813, in call
return convert_to_fp32(self.model_forward(*args, *kwargs))
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/peft/peft_model.py", line 1129, in forward
return self.base_model(
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 161, in forward
return self.model.forward(*args, *kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/Qwen-VL-Chat/modeling_qwen.py", line 856, in forward
transformer_outputs = self.transformer(
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/Qwen-VL-Chat/modeling_qwen.py", line 571, in forward
images = self.visual(fake_images)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/Qwen-VL-Chat/visual.py", line 398, in forward
x = self.conv1(x) # shape = [, width, grid, grid]
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/anaconda3/envs/swift/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (CUDABFloat16Type) should be the same