[X] I have read the README and searched the existing issues.
System Info
Traceback (most recent call last):
File "[My_env_dir]/lib/python3.9/threading.py", line 980, in _bootstrap_inner
self.run()
File "[My_env_dir]/lib/python3.9/threading.py", line 917, in run
self._target(*self._args, **self._kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/transformers/generation/utils.py", line 1758, in generate
result = self._sample( [59/1930]
File "[My_env_dir]/lib/python3.9/site-packages/transformers/generation/utils.py", line 2397, in _sample
outputs = self(
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 1164, in forward
outputs = self.model(
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 968, in forward
layer_outputs = decoder_layer(
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 713, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 616, in forward
key_states = self.k_proj(hidden_states)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "[My_env_dir]/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: expected mat1 and mat2 to have the same dtype, but got: c10::Half != float
Reminder
System Info
Reproduction
Inference config file:
Run with LLaMA-Factory
Expected behavior
No response
Others
No response