ffn_config = BaseQuantizeConfig(
nbits=4,
group_size=64,
quant_zero=True,
quant_scale=True,
)
errors are below
Traceback (most recent call last):
File "/workspace/accelerate_files/demo.py", line 121, in
result = model.generate(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/generation/utils.py", line 1718, in generate
return self.greedy_search(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/generation/utils.py", line 2579, in greedy_search
outputs = self(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py", line 1228, in forward
outputs = self.model(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py", line 1093, in forward
layer_outputs = decoder_layer(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py", line 818, in forward
hidden_states, router_logits = self.block_sparse_moe(hidden_states,prefetch_uids,next_layer_moe)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 320, in forward
current_hidden_states = expert_layer(current_state) routing_weights[top_x_list, idx_list, None]
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/expert_wrapper.py", line 33, in forward
return self.expert_module(args, kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/expert_wrapper.py", line 18, in
self.expert_module = lambda *args, kwargs: expert_module(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 255, in forward
current_hidden_states = self.act_fn(self.w1(hidden_states)) self.w3(hidden_states)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 50, in forward
return self.forward_triton(x)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, *kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 65, in forward_triton
meta['scale'] = Quantizer.dequantize(meta['scale_q'], meta['meta_scale']); del_keys.append('scale')
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/hqq/core/quantize.py", line 86, in dequantize
W_r = ((W_q_p - meta['zero'])meta['scale']).reshape(meta['shape'])
RuntimeError: shape '[1, 917504]' is invalid for input of size 3670016
changes to the demo
ffn_config = BaseQuantizeConfig( nbits=4, group_size=64, quant_zero=True, quant_scale=True, ) errors are below Traceback (most recent call last): File "/workspace/accelerate_files/demo.py", line 121, in
result = model.generate(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/generation/utils.py", line 1718, in generate
return self.greedy_search(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/generation/utils.py", line 2579, in greedy_search
outputs = self(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py", line 1228, in forward
outputs = self.model(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py", line 1093, in forward
layer_outputs = decoder_layer(
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/transformers/models/mixtral/modeling_mixtral.py", line 818, in forward
hidden_states, router_logits = self.block_sparse_moe(hidden_states,prefetch_uids,next_layer_moe)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 320, in forward
current_hidden_states = expert_layer(current_state) routing_weights[top_x_list, idx_list, None]
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/expert_wrapper.py", line 33, in forward
return self.expert_module(args, kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/expert_wrapper.py", line 18, in
self.expert_module = lambda *args, kwargs: expert_module(*args, *kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 255, in forward
current_hidden_states = self.act_fn(self.w1(hidden_states)) self.w3(hidden_states)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 50, in forward
return self.forward_triton(x)
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, *kwargs)
File "/workspace/accelerate_files/mixtraloffloading/src/custom_layers.py", line 65, in forward_triton
meta['scale'] = Quantizer.dequantize(meta['scale_q'], meta['meta_scale']); del_keys.append('scale')
File "/root/anaconda3/envs/dsmii/lib/python3.10/site-packages/hqq/core/quantize.py", line 86, in dequantize
W_r = ((W_q_p - meta['zero'])meta['scale']).reshape(meta['shape'])
RuntimeError: shape '[1, 917504]' is invalid for input of size 3670016