Traceback (most recent call last):
File "/content/text-generation-webui/modules/callbacks.py", line 56, in gentask
ret = self.mfunc(callback=_callback, *args, self.kwargs)
File "/content/text-generation-webui/modules/text_generation.py", line 311, in generate_with_callback
shared.model.generate(kwargs)
File "/usr/local/lib/python3.10/dist-packages/auto_gptq/modeling/_base.py", line 443, in generate
return self.model.generate(kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 1642, in generate
return self.sample(
File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 2724, in sample
outputs = self(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 809, in forward
outputs = self.model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 697, in forward
layer_outputs = decoder_layer(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 426, in forward
hidden_states = self.mlp(hidden_states)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 210, in forward
[F.linear(x, gate_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1
File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 210, in
[F.linear(x, gate_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1848x5120 and 13824x640)
Output generated in 4.45 seconds (0.00 tokens/s, 0 tokens, context 1848, seed 1194575447)
Traceback (most recent call last): File "/content/text-generation-webui/modules/callbacks.py", line 56, in gentask ret = self.mfunc(callback=_callback, *args, self.kwargs) File "/content/text-generation-webui/modules/text_generation.py", line 311, in generate_with_callback shared.model.generate(kwargs) File "/usr/local/lib/python3.10/dist-packages/auto_gptq/modeling/_base.py", line 443, in generate return self.model.generate(kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, *kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 1642, in generate return self.sample( File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 2724, in sample outputs = self( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 809, in forward outputs = self.model( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 697, in forward layer_outputs = decoder_layer( File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 426, in forward hidden_states = self.mlp(hidden_states) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 210, in forward [F.linear(x, gate_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1 File "/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py", line 210, in
[F.linear(x, gate_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1848x5120 and 13824x640)
Output generated in 4.45 seconds (0.00 tokens/s, 0 tokens, context 1848, seed 1194575447)