Encountering a possible model format issue with AWQ-INT4 quantized Llama3.1-8B

TacticalSpoon331 commented 4 days ago

AttributeError Traceback (most recent call last) Cell In[3], line 16 9 MODEL_TYPE = "mylesgoose/Llama-3.1-8B-Instruct" 10 # models--mylesgoose--Llama-3.1-8B-Instruct 11 12 # Download and load model 13 # !git clone https://huggingface.co/{MODEL_ID} {MODEL_TYPE} 14 15 # Load model and tokenizer ---> 16 model = HookedTransformer.from_pretrained_no_processing( 17 # "/workspace/mylesgoose/Llama-3.1-8B-Instruct", 18 f"{MODEL_ID}", 19 local_files_only=True, 20 dtype=torch.float16, 21 default_padding_side='left' 22 ) 23 tokenizer = AutoTokenizer.from_pretrained(f"{MODEL_ID}") 24 tokenizer.padding_side = 'left'

File /workspace/myenv/lib/python3.10/site-packages/transformer_lens/HookedTransformer.py:1345, in HookedTransformer.from_pretrained_no_processing(cls, model_name, fold_ln, center_writing_weights, center_unembed, refactor_factored_attn_matrices, 1326 1327 1328 (...) 1338 1339 1340 1341 1342 1343 1344 -> 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 fold_value_biases, dtype, default_prepend_bos, default_padding_side, from_pretrained_kwargs) @classmethod def from_pretrained_no_processing( cls, from_pretrained_kwargs, ): """Wrapper for from_pretrained. Wrapper for from_pretrained with all boolean flags related to simplifying the model set to False. Refer to from_pretrained for details. """ return cls.from_pretrained( model_name, fold_ln=fold_ln, center_writing_weights=center_writing_weights, center_unembed=center_unembed, fold_value_biases=fold_value_biases, refactor_factored_attn_matrices=refactor_factored_attn_matrices, dtype=dtype, default_prepend_bos=default_prepend_bos, default_padding_side=default_padding_side, **from_pretrained_kwargs, )

File /workspace/myenv/lib/python3.10/site-packages/transformer_lens/HookedTransformer.py:1298, in HookedTransformer.from_pretrained(cls, model_name, fold_ln, center_writing_weights, center_unembed, refactor_factored_attn_matrices, checkpoint_index, checkpoint_value, hf_model, device, n_devices, tokenizer, move_to_device, fold_value_biases, default_prepend_bos, default_padding_side, dtype, first_n_layers, from_pretrained_kwargs) 1294 center_unembed = False 1296 # Get the state dict of the model (ie a mapping of parameter names to tensors), processed to 1297 # match the HookedTransformer parameter names. -> 1298 state_dict = loading.get_pretrained_state_dict( 1299 official_model_name, cfg, hf_model, dtype=dtype, from_pretrained_kwargs 1300 ) 1302 # Create the HookedTransformer object 1303 model = cls( 1304 cfg, 1305 tokenizer, 1306 move_to_device=False, 1307 default_padding_side=default_padding_side, 1308 )

File /workspace/myenv/lib/python3.10/site-packages/transformer_lens/loading_from_pretrained.py:1840, in get_pretrained_state_dict(official_model_name, cfg, hf_model, dtype, **kwargs) 1838 state_dict = convert_neox_weights(hf_model, cfg) 1839 elif cfg.original_architecture == "LlamaForCausalLM": -> 1840 state_dict = convert_llama_weights(hf_model, cfg) 1841 elif cfg.original_architecture == "MllamaForConditionalGeneration": 1842 state_dict = convert_mllama_weights(hf_model, cfg)

File /workspace/myenv/lib/python3.10/site-packages/transformer_lens/pretrained/weight_conversions/llama.py:29, in convert_llama_weights(llama, cfg) 26 for l in range(cfg.n_layers): 27 state_dict[f"blocks.{l}.ln1.w"] = llama.model.layers[l].input_layernorm.weight ---> 29 W_Q = llama.model.layers[l].self_attn.q_proj.weight 30 W_K = llama.model.layers[l].self_attn.k_proj.weight 31 W_V = llama.model.layers[l].self_attn.v_proj.weight

File /workspace/myenv/lib/python3.10/site-packages/torch/nn/modules/module.py:1709, in Module.getattr(self, name) 1707 if name in modules: 1708 return modules[name] -> 1709 raise AttributeError(f"'{type(self).name}' object has no attribute '{name}'")

AttributeError: 'WQLinear_GEMM' object has no attribute 'weight'

TacticalSpoon331 commented 4 days ago

Model page: https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4/tree/main

auto-gptq instance was giving me some issues so my next idea was auto-awq.

Using the attribute 'qweight' instead of 'weight' fixed the immediate error but caused much more chaos down the line

TacticalSpoon331 commented 4 days ago

Completely wrong repo 😭😭😭

hannamw / eap-ig-faithfulness

Encountering a possible model format issue with AWQ-INT4 quantized Llama3.1-8B #2