File "/home/user/app/app.py", line 77, in generate_and_visualize
max_logits.backward(max_logits)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/_tensor.py", line 521, in backward
torch.autograd.backward(
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/autograd/__init__.py", line 289, in backward
_engine_run_backward(
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/autograd/graph.py", line 769, in _engine_run_backward
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/autograd/function.py", line 306, in apply
return user_fn(self, *args)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 296, in backward
outputs = ctx.run_function(*detached_inputs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/lxt/models/mixtral.py", line 555, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
File "/home/user/.pyenv/versions/3.10.15/lib/python3.10/site-packages/lxt/models/mixtral.py", line 369, in forward
raise ValueError(
ValueError: Attention mask should be of size (1, 1, 483, 966), but is torch.Size([1, 1, 483, 483])
Is Mixtral-8x7B not working anymore? Or should I change versions for transformers for instance?
I'm using this example: https://lxt.readthedocs.io/en/latest/quickstart.html#mixtral-8x7b
And I get this error:
Is Mixtral-8x7B not working anymore? Or should I change versions for transformers for instance?