Traceback (most recent call last):
File "/mnt/data/lexiao/workspace/develop/OmniQuant/main.py", line 375, in <module>
main()
File "/mnt/data/lexiao/workspace/develop/OmniQuant/main.py", line 345, in main
omniquant(
File "/mnt/data/lexiao/workspace/develop/OmniQuant/quantize/omniquant.py", line 213, in omniquant
fp_inps[j] = qlayer(fp_inps[j].unsqueeze(0), attention_mask=attention_mask,position_ids=position_ids)[0]
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/lexiao/workspace/develop/OmniQuant/models/int_llama_layer.py", line 243, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/mnt/data/lexiao/workspace/develop/OmniQuant/models/int_llama_layer.py", line 126, in forward
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/lexiao/anaconda3/envs/debug/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
TypeError: LlamaRotaryEmbedding.forward() missing 1 required positional argument: 'position_ids'
the seq_len argument is removed in latest llama code in transformers. Runing code with transformers >=4.38 will lead to the error above.
the
seq_len
argument is removed in latest llama code in transformers. Runing code with transformers >=4.38 will lead to the error above.