详细报错内容如下:
/home/huskar/.local/lib/python3.10/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set no_deprecation_warning=True to disable this warning
warnings.warn(
0%| | 0/3000 [00:00<?, ?it/s]02/23/2024 11:00:45 - WARNING - transformers_modules.chatglm-6b-int4.modeling_chatglm - use_cache=True is incompatible with gradient checkpointing. Setting use_cache=False...
/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
warnings.warn(
Traceback (most recent call last):
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 430, in
main()
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 369, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1635, in train
return inner_training_loop(
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1904, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2647, in training_step
loss = self.compute_loss(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2679, in compute_loss
outputs = model(inputs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 985, in forward
layer_ret = torch.utils.checkpoint.checkpoint(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 482, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 553, in apply
return super().apply(args, kwargs) # type: ignore[misc]
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 261, in forward
outputs = run_function(args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, *kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 624, in forward
attention_input = self.input_layernorm(hidden_states)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 201, in forward
return F.layer_norm(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2546, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: "LayerNormKernelImpl" not implemented for 'Half'
0%| | 0/3000 [00:00<?, ?it/s]
Expected Behavior
No response
Steps To Reproduce
修改 train.sh line 4 为 CUDA_VISIBLE_DEVICES=1 python3 main.py
Is there an existing issue for this?
Current Behavior
在使用P-tuning微调时报错 LayerNormKernelImpl,系统为WSL-Ubuntu 22.04.3,硬件为nvidia 4060,8G显存,运行 chatglm-6b-int4量化版本
详细报错内容如下: /home/huskar/.local/lib/python3.10/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set
main()
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 369, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1635, in train
return inner_training_loop(
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1904, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2647, in training_step
loss = self.compute_loss(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2679, in compute_loss
outputs = model(inputs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 985, in forward
layer_ret = torch.utils.checkpoint.checkpoint(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 482, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 553, in apply
return super().apply(args, kwargs) # type: ignore[misc]
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 261, in forward
outputs = run_function(args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, *kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 624, in forward
attention_input = self.input_layernorm(hidden_states)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 201, in forward
return F.layer_norm(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2546, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: "LayerNormKernelImpl" not implemented for 'Half'
0%| | 0/3000 [00:00<?, ?it/s]
no_deprecation_warning=True
to disable this warning warnings.warn( 0%| | 0/3000 [00:00<?, ?it/s]02/23/2024 11:00:45 - WARNING - transformers_modules.chatglm-6b-int4.modeling_chatglm -use_cache=True
is incompatible with gradient checkpointing. Settinguse_cache=False
... /home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. warnings.warn( Traceback (most recent call last): File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 430, inExpected Behavior
No response
Steps To Reproduce
Environment
Anything else?
No response