Open TobyGE opened 10 months ago
Hmm, a new error with MPT models. I fixed one not too long ago and had a successful quantization with this model and I see the last time they introduced a change was 2 months ago. Do you by chance have an old version of their model downloaded?
Currently not. Just tried to learn the basics of AutoAWQ then met this issue.
I got the same error. Do you have any idea how to fix this? @TobyGE @casper-hansen
Traceback (most recent call last):
File "/lustre/scratch/client/scratch/research/user/datnq9/AutoAWQ/convert.py", line 13, in <module>
model.quantize(tokenizer, quant_config=quant_config)
File "/usr/lib/python3/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/lustre/scratch/client/scratch/research/user/datnq9/AutoAWQ/awq/models/base.py", line 155, in quantize
self.quantizer = AwqQuantizer(
File "/lustre/scratch/client/scratch/research/user/datnq9/AutoAWQ/awq/quantize/quantizer.py", line 56, in __init__
self.modules, self.module_kwargs, self.inps = self.init_quant()
File "/lustre/scratch/client/scratch/research/user/datnq9/AutoAWQ/awq/quantize/quantizer.py", line 472, in init_quant
layer_kwargs = self.model.prepare_inputs_for_generation(samples, **layer_kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/mpt-7b/modeling_mpt.py", line 300, in prepare_inputs_for_generation
attention_mask = kwargs['attention_mask'].bool()
AttributeError: 'NoneType' object has no attribute 'bool'
Getting a similar error but with Llama and some Catcher
class:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
AWQ_CONFIG = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="meta-llama/Llama-2-7b-hf")
llama = AutoAWQForCausalLM.from_pretrained(model_path="meta-llama/Llama-2-7b-hf")
llama.quantize(tokenizer=tokenizer, quant_config=AWQ_CONFIG)
Traceback (most recent call last):
File "/home/liamdodds/projects/quantization/quantize.py", line 20, in <module>
llama.quantize(tokenizer=tokenizer, quant_config=AWQ_CONFIG)
File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/awq/models/base.py", line 155, in quantize
self.quantizer = AwqQuantizer(
File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/awq/quantize/quantizer.py", line 56, in __init__
self.modules, self.module_kwargs, self.inps = self.init_quant()
File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/awq/quantize/quantizer.py", line 472, in init_quant
layer_kwargs = self.model.prepare_inputs_for_generation(samples, **layer_kwargs)
File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1256, in prepare_inputs_for_generation
if past_key_value := getattr(self.model.layers[0].self_attn, "past_key_value", None):
File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1688, in __getattr__
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'Catcher' object has no attribute 'self_attn'
Has anyone made any headroom with this yet? Managed to get past AttributeError: 'NoneType' object has no attribute 'bool'
by passing in the attention_mask manually, but have been running into a chain of other issues.
Getting a similar error but with Llama and some
Catcher
class:from awq import AutoAWQForCausalLM from transformers import AutoTokenizer AWQ_CONFIG = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="meta-llama/Llama-2-7b-hf") llama = AutoAWQForCausalLM.from_pretrained(model_path="meta-llama/Llama-2-7b-hf") llama.quantize(tokenizer=tokenizer, quant_config=AWQ_CONFIG)
Traceback (most recent call last): File "/home/liamdodds/projects/quantization/quantize.py", line 20, in <module> llama.quantize(tokenizer=tokenizer, quant_config=AWQ_CONFIG) File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/awq/models/base.py", line 155, in quantize self.quantizer = AwqQuantizer( File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/awq/quantize/quantizer.py", line 56, in __init__ self.modules, self.module_kwargs, self.inps = self.init_quant() File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/awq/quantize/quantizer.py", line 472, in init_quant layer_kwargs = self.model.prepare_inputs_for_generation(samples, **layer_kwargs) File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1256, in prepare_inputs_for_generation if past_key_value := getattr(self.model.layers[0].self_attn, "past_key_value", None): File "/home/liamdodds/projects/quantization/.env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1688, in __getattr__ raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") AttributeError: 'Catcher' object has no attribute 'self_attn'
I got the same error.
facing this issue as well
Ran the following code to quantize MPT-7B, met the following error.
from awq import AutoAWQForCausalLM from transformers import AutoTokenizer
model_path = "mosaicml/mpt-7b" quant_path = './mpt_7b_awq'
Load model
model = AutoAWQForCausalLM.from_pretrained(model_path, cache_dir="./huggingface_transformers/") tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
Quantize
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
model.quantize(tokenizer, quant_config=quant_config)
Error
AttributeError Traceback (most recent call last) Cell In[3], line 4 1 # Quantize 2 quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } ----> 4 model.quantize(tokenizer, quant_config=quant_config)
File ~/SageMaker/vllm/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, *kwargs):
114 with ctx_factory():
--> 115 return func(args, kwargs)
File ~/SageMaker/vllm/lib/python3.10/site-packages/awq/models/base.py:89, in BaseAWQForCausalLM.quantize(self, tokenizer, quant_config, calib_data, split, text_column, duo_scaling, modules_to_not_convert) 83 @torch.no_grad() 84 def quantize(self, tokenizer=None, quant_config={}, 85 calib_data: Union[str, List[str]]="pileval", 86 split="train", text_column="text", duo_scaling=True, modules_to_not_convert=None): 87 self.quant_config: AwqConfig = AwqConfig.from_dict(quant_config) ---> 89 quantizer = AwqQuantizer( 90 self, self.model, tokenizer, self.quant_config.w_bit, self.quant_config.q_group_size, 91 self.quant_config.version, calib_data, split, text_column, duo_scaling, modules_to_not_convert=modules_to_not_convert 92 ) 93 quantizer.quantize() 95 self.is_quantized = True
File ~/SageMaker/vllm/lib/python3.10/site-packages/awq/quantize/quantizer.py:36, in AwqQuantizer.init(self, awq_model, model, tokenizer, w_bit, group_size, version, calib_data, split, text_column, duo_scaling, modules_to_not_convert) 34 self.duo_scaling = duo_scaling 35 self.modules_to_not_convert = modules_to_not_convert if modules_to_not_convert is not None else [] ---> 36 self.modules, self.module_kwargs, self.inps = self.init_quant()
File ~/SageMaker/vllm/lib/python3.10/site-packages/awq/quantize/quantizer.py:362, in AwqQuantizer.init_quant(self, n_samples, seqlen) 358 pass 360 # Update the layer kwargs with
prepare_inputs_for_generation
method 361 # that takes care of everything to avoid unexpected errors. --> 362 layer_kwargs = self.model.prepare_inputs_for_generation(samples, **layer_kwargs) 363 # Pop the input_ids as they are not needed at all. 364 layer_kwargs.pop("input_ids")File ~/.cache/huggingface/modules/transformers_modules/ada218f9a93b5f1c6dce48a4cc9ff01fcba431e7/modeling_mpt.py:300, in MPTForCausalLM.prepare_inputs_for_generation(self, input_ids, past_key_values, inputs_embeds, **kwargs) 298 if inputs_embeds is not None: 299 raise NotImplementedError('inputs_embeds is not implemented for MPT yet') --> 300 attention_mask = kwargs['attention_mask'].bool() 301 if attention_mask[:, -1].sum() != attention_mask.shape[0]: 302 raise NotImplementedError('MPT does not support generation with right padding.')
AttributeError: 'NoneType' object has no attribute 'bool'