0%| | 0/656 [00:00<?, ?it/s]Traceback (most recent call last):
File "/home/ec2-user/SageMaker/MedicalGPT/pretraining.py", line 742, in
main()
File "/home/ec2-user/SageMaker/MedicalGPT/pretraining.py", line 703, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
return inner_training_loop(
File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/transformers/trainer.py", line 1904, in inner_training_loop
self.accelerator.clip_grad_norm(
File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/accelerate/accelerator.py", line 2124, in clip_gradnorm
self.unscale_gradients()
File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/accelerate/accelerator.py", line 2087, in unscalegradients
self.scaler.unscale(opt)
File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/torch/cuda/amp/gradscaler.py", line 284, in unscale
optimizer_state["found_inf_per_device"] = self.unscale_grads(optimizer, inv_scale, found_inf, False)
File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/torch/cuda/amp/grad_scaler.py", line 212, in unscale_grads
raise ValueError("Attempting to unscale FP16 gradients.")
ValueError: Attempting to unscale FP16 gradients.
0%| | 0/656 [00:00<?, ?it/s]Traceback (most recent call last): File "/home/ec2-user/SageMaker/MedicalGPT/pretraining.py", line 742, in main() File "/home/ec2-user/SageMaker/MedicalGPT/pretraining.py", line 703, in main train_result = trainer.train(resume_from_checkpoint=checkpoint) File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train return inner_training_loop( File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/transformers/trainer.py", line 1904, in inner_training_loop self.accelerator.clip_grad_norm( File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/accelerate/accelerator.py", line 2124, in clip_gradnorm self.unscale_gradients() File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/accelerate/accelerator.py", line 2087, in unscalegradients self.scaler.unscale(opt) File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/torch/cuda/amp/gradscaler.py", line 284, in unscale optimizer_state["found_inf_per_device"] = self.unscale_grads(optimizer, inv_scale, found_inf, False) File "/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/torch/cuda/amp/grad_scaler.py", line 212, in unscale_grads raise ValueError("Attempting to unscale FP16 gradients.") ValueError: Attempting to unscale FP16 gradients.
加这句参数 --modules_to_save embed_tokens,lm_head 会报错