训练时遇到报错,Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. #159
Traceback (most recent call last):
File "finetune.py", line 64, in <module>
model = LlamaForCausalLM.from_pretrained(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2740, in from_pretrained
raise ValueError(
ValueError:
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
`device_map` to `from_pretrained`. Check
https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
for more details.
然后我看到单卡双卡的issue文档,我把
model = LlamaForCausalLM.from_pretrained(
args.model_path,
load_in_8bit=True,
torch_dtype=torch.float16,
device_map=device_map,
)
```中的device_map=device_map改为device_map={"":"cpu"},之后的报错是:
File "finetune.py", line 272, in
trainer.train(resume_from_checkpoint=args.resume_from_checkpoint)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 1662, in train
return inner_training_loop(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 2699, in training_step
loss = self.compute_loss(model, inputs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 2731, in compute_loss
outputs = model(inputs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/peft/peft_model.py", line 575, in forward
return self.base_model(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 687, in forward
outputs = self.model(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 569, in forward
layer_outputs = torch.utils.checkpoint.checkpoint(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(args)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 565, in custom_forward
return module(inputs, output_attentions, None)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 292, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 196, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, **kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/peft/tuners/lora.py", line 576, in forward
result = super().forward(x)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/bitsandbytes/nn/modules.py", line 242, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 273, in forward
using_igemmlt = torch.cuda.get_device_capability(device=A.device) >= (7, 5) and not state.force_no_igemmlt
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/cuda/init.py", line 357, in get_device_capability
prop = get_device_properties(device)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/cuda/init.py", line 372, in get_device_properties
device = _get_device_index(device, optional=True)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/cuda/_utils.py", line 30, in _get_device_index
raise ValueError('Expected a cuda device, but got: {}'.format(device))
ValueError: Expected a cuda device, but got: cpu
如果你遇到问题需要我们帮助,你可以从以下角度描述你的信息,以便于我们可以理解或者复现你的错误(学会如何提问不仅是能帮助我们理解你,也是一个自查的过程): 1、你使用了哪个脚本、使用的什么命令 2、你的参数是什么(脚本参数、命令参数) 3、你是否修改过我们的代码 4、你用的哪个数据集
然后你可以从环境的角度描述你的问题,这些问题我们在readme已经相关的问题及解决可能会有描述: 1、哪个操作系统 ubuntu 2、使用的什么显卡、多少张 A100 1张 3、python的版本 4、python各种库的版本 这些版本都是按照文档下载对齐一致
然后你也可以从运行的角度来描述你的问题: 1、报错信息是什么,是哪个代码的报错(可以将完整的报错信息都发给我们) 按照上述的填写情况报错是这样的:
然后我看到单卡双卡的issue文档,我把
File "finetune.py", line 272, in
trainer.train(resume_from_checkpoint=args.resume_from_checkpoint)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 1662, in train
return inner_training_loop(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 2699, in training_step
loss = self.compute_loss(model, inputs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/trainer.py", line 2731, in compute_loss
outputs = model(inputs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/peft/peft_model.py", line 575, in forward
return self.base_model(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 687, in forward
outputs = self.model(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 569, in forward
layer_outputs = torch.utils.checkpoint.checkpoint(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(args)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 565, in custom_forward
return module(inputs, output_attentions, None)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 292, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 196, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, **kwargs)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/peft/tuners/lora.py", line 576, in forward
result = super().forward(x)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/bitsandbytes/nn/modules.py", line 242, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 488, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 273, in forward
using_igemmlt = torch.cuda.get_device_capability(device=A.device) >= (7, 5) and not state.force_no_igemmlt
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/cuda/init.py", line 357, in get_device_capability
prop = get_device_properties(device)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/cuda/init.py", line 372, in get_device_properties
device = _get_device_index(device, optional=True)
File "/home/star/miniconda3/envs/chinese-vicuna/lib/python3.8/site-packages/torch/cuda/_utils.py", line 30, in _get_device_index
raise ValueError('Expected a cuda device, but got: {}'.format(device))
ValueError: Expected a cuda device, but got: cpu