I use alpaca-lora to tune 70B llama-2, but it shows the below errors:
File "/home/xx/SMLLM/visual-med-alpaca/code/med-alpaca-lora/finetune.py", line 222, in <module>
fire.Fire(train)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/fire/core.py", line 475, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/fire/core.py", line 691, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/home/xx/SMLLM/visual-med-alpaca/code/med-alpaca-lora/finetune.py", line 191, in train
trainer.train()
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/trainer.py", line 1664, in train
return inner_training_loop(
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/trainer.py", line 1940, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/trainer.py", line 2735, in training_step
loss = self.compute_loss(model, inputs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/trainer.py", line 2767, in compute_loss
outputs = model(**inputs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/peft/peft_model.py", line 918, in forward
return self.base_model(
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/peft/tuners/tuners_utils.py", line 94, in forward
return self.model.forward(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 688, in forward
outputs = self.model(
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 570, in forward
layer_outputs = torch.utils.checkpoint.checkpoint(
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 566, in custom_forward
return module(*inputs, output_attentions, None)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 293, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/xx/condaenv/envs/smllm/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 198, in forward
key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
RuntimeError: shape '[4, 512, 64, 128]' is invalid for input of size 2097152
Does alpaca-lora support 70B llama 2 tuning work? If yes, please help me to solve the above issues.
Dear everyone,
I use alpaca-lora to tune 70B llama-2, but it shows the below errors:
Does alpaca-lora support 70B llama 2 tuning work? If yes, please help me to solve the above issues.
Thanks very much.