out = peft_model.forward(batch)
out = peft_model.forward(batch)
AttributeError Traceback (most recent call last)
Cell In[59], line 1
----> 1 out = peft_model.forward(**batch)
File /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:931, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, kwargs)
920 raise AssertionError("forward in MPTForCausalLM does not support inputs_embeds")
921 return self.base_model(
922 input_ids=input_ids,
923 attention_mask=attention_mask,
(...)
928 kwargs,
929 )
--> 931 return self.base_model(
932 input_ids=input_ids,
933 attention_mask=attention_mask,
934 inputs_embeds=inputs_embeds,
935 labels=labels,
936 output_attentions=output_attentions,
937 output_hidden_states=output_hidden_states,
938 return_dict=return_dict,
939 **kwargs,
940 )
942 batch_size = _get_batch_size(input_ids, inputs_embeds)
943 if attention_mask is not None:
944 # concat prompt attention mask
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/peft/tuners/adalora/model.py:234, in AdaLoraModel.forward(self, *args, kwargs)
233 def forward(self, *args, *kwargs):
--> 234 outputs = self.model.forward(args, kwargs)
236 if getattr(outputs, "loss", None) is not None:
237 # Calculate the orthogonal regularization
238 orth_reg_weight = self.peft_config[self.trainable_adapter_name].orth_reg_weight
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/peft/tuners/adalora/bnb.py:145, in SVDLinear4bit.forward(self, x)
143 if requires_conversion:
144 expected_dtype = result.dtype
--> 145 compute_dtype = lora_A.weight.dtype
146 if x.dtype != compute_dtype:
147 x = x.to(compute_dtype)
AttributeError: 'Parameter' object has no attribute 'weight'
out = peft_model.forward(batch) out = peft_model.forward(batch)
AttributeError Traceback (most recent call last) Cell In[59], line 1 ----> 1 out = peft_model.forward(**batch)
File /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:931, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, kwargs) 920 raise AssertionError("forward in MPTForCausalLM does not support inputs_embeds") 921 return self.base_model( 922 input_ids=input_ids, 923 attention_mask=attention_mask, (...) 928 kwargs, 929 ) --> 931 return self.base_model( 932 input_ids=input_ids, 933 attention_mask=attention_mask, 934 inputs_embeds=inputs_embeds, 935 labels=labels, 936 output_attentions=output_attentions, 937 output_hidden_states=output_hidden_states, 938 return_dict=return_dict, 939 **kwargs, 940 ) 942 batch_size = _get_batch_size(input_ids, inputs_embeds) 943 if attention_mask is not None: 944 # concat prompt attention mask
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/peft/tuners/adalora/model.py:234, in AdaLoraModel.forward(self, *args, kwargs) 233 def forward(self, *args, *kwargs): --> 234 outputs = self.model.forward(args, kwargs) 236 if getattr(outputs, "loss", None) is not None: 237 # Calculate the orthogonal regularization 238 orth_reg_weight = self.peft_config[self.trainable_adapter_name].orth_reg_weight
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module..new_forward(*args, kwargs)
163 output = old_forward(*args, *kwargs)
164 else:
--> 165 output = old_forward(args, kwargs)
166 return module._hf_hook.post_forward(module, output)
File ~/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_baichuan.py:447, in BaichuanForCausalLM.forward(self, input_ids, attention_mask, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, **kwargs) 444 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 446 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) --> 447 outputs = self.model( 448 input_ids=input_ids, 449 attention_mask=attention_mask, 450 past_key_values=past_key_values, 451 inputs_embeds=inputs_embeds, 452 use_cache=use_cache, 453 output_attentions=output_attentions, 454 output_hidden_states=output_hidden_states, 455 return_dict=return_dict, 456 ) 458 hidden_states = outputs[0] 459 logits = self.lm_head(hidden_states)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module..new_forward(*args, kwargs)
163 output = old_forward(*args, *kwargs)
164 else:
--> 165 output = old_forward(args, kwargs)
166 return module._hf_hook.post_forward(module, output)
File ~/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_baichuan.py:370, in BaichuanModel.forward(self, input_ids, attention_mask, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict) 363 layer_outputs = torch.utils.checkpoint.checkpoint( 364 create_custom_forward(decoder_layer), 365 hidden_states, 366 attention_mask, 367 None, 368 ) 369 else: --> 370 layer_outputs = decoder_layer( 371 hidden_states, 372 attention_mask=attention_mask, 373 past_key_value=past_key_value, 374 output_attentions=output_attentions, 375 use_cache=use_cache, 376 ) 378 hidden_states = layer_outputs[0] 380 if use_cache:
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module..new_forward(*args, kwargs)
163 output = old_forward(*args, *kwargs)
164 else:
--> 165 output = old_forward(args, kwargs)
166 return module._hf_hook.post_forward(module, output)
File ~/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_baichuan.py:193, in BaichuanLayer.forward(self, hidden_states, attention_mask, past_key_value, output_attentions, use_cache) 190 hidden_states = self.input_layernorm(hidden_states) 192 # Self Attention --> 193 hidden_states, self_attn_weights, present_key_value = self.self_attn( 194 hidden_states=hidden_states, 195 attention_mask=attention_mask, 196 past_key_value=past_key_value, 197 output_attentions=output_attentions, 198 use_cache=use_cache, 199 ) 200 hidden_states = residual + hidden_states 202 # Fully Connected
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module..new_forward(*args, kwargs)
163 output = old_forward(*args, *kwargs)
164 else:
--> 165 output = old_forward(args, kwargs)
166 return module._hf_hook.post_forward(module, output)
File ~/.cache/huggingface/modules/transformers_modules/Baichuan-13B-Chat/modeling_baichuan.py:124, in BaichuanAttention.forward(self, hidden_states, attention_mask, past_key_value, output_attentions, use_cache) 113 def forward( 114 self, 115 hidden_states: torch.Tensor, (...) 119 use_cache: bool = False, 120 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: 122 bsz, qlen, = hidden_states.size() --> 124 proj = self.W_pack(hidden_states) 125 proj = proj.unflatten(-1, (3, self.hidden_size)).unsqueeze(0).transpose(0, -2).squeeze(-2) 126 query_states = proj[0].view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []
File /usr/local/lib/python3.10/dist-packages/peft/tuners/adalora/bnb.py:145, in SVDLinear4bit.forward(self, x) 143 if requires_conversion: 144 expected_dtype = result.dtype --> 145 compute_dtype = lora_A.weight.dtype 146 if x.dtype != compute_dtype: 147 x = x.to(compute_dtype)
AttributeError: 'Parameter' object has no attribute 'weight'
可否提供下torch 和 peft 的版本