Open bjarnedesmetinect opened 3 months ago
Sorry I install transformers tip of the implementation branch, I see some commits were added that might've broken the code. Can you install this commit on same branch: e1b7c0a05ab65e4ddb62a407fe12f8ec13a916f0 @bjarnedesmetinect
Many Thanks! That did the job, I saw it had something to do with the cache? Just out of curiosity to check if I was on the right path!
Hi,
I was able to install the richt commit with the following command : "pip install git+https://github.com/andimarafioti/transformers.git@e1b7c0a05ab65e4ddb62a407fe12f8ec13a916f0"
Still got the "get_seq_length" problem. Can anyone help? Am i doing something wrong ?
EDIT : SHA a72b30fe06bba77d9df4c72fcea48bbdc0d812a5 worked. (same commit but different SHA ?????)
@bjarnedesmetinect yes it was, very sorry for the inconvenience! @pierre-bedu-catie interesting!
Hi,
I get this error in the training part. Is there something I am doing wrong? Using for test purposes the same dataset!
Thanks!
AttributeError Traceback (most recent call last) Cell In[19], line 1 ----> 1 trainer.train()
File /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1948, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1946 hf_hub_utils.enable_progress_bars() 1947 else: -> 1948 return inner_training_loop( 1949 args=args, 1950 resume_from_checkpoint=resume_from_checkpoint, 1951 trial=trial, 1952 ignore_keys_for_eval=ignore_keys_for_eval, 1953 )
File /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2289, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 2286 self.control = self.callback_handler.on_step_begin(args, self.state, self.control) 2288 with self.accelerator.accumulate(model): -> 2289 tr_loss_step = self.training_step(model, inputs) 2291 if ( 2292 args.logging_nan_inf_filter 2293 and not is_torch_xla_available() 2294 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) 2295 ): 2296 # if loss is nan or inf simply add the average of previous logged losses 2297 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:3328, in Trainer.training_step(self, model, inputs) 3325 return loss_mb.reduce_mean().detach().to(self.args.device) 3327 with self.compute_loss_context_manager(): -> 3328 loss = self.compute_loss(model, inputs) 3330 del inputs 3331 if ( 3332 self.args.torch_empty_cache_steps is not None 3333 and self.state.global_step % self.args.torch_empty_cache_steps == 0 3334 ):
File /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:3373, in Trainer.compute_loss(self, model, inputs, return_outputs) 3371 else: 3372 labels = None -> 3373 outputs = model(**inputs) 3374 # Save past state if it exists 3375 # TODO: this needs to be fixed and made cleaner later. 3376 if self.args.past_index >= 0:
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None
File /usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py:819, in convert_outputs_to_fp32..forward(*args, kwargs)
818 def forward(*args, *kwargs):
--> 819 return model_forward(args, kwargs)
File /usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py:807, in ConvertOutputsToFp32.call(self, *args, kwargs) 806 def call(self, *args, *kwargs): --> 807 return convert_to_fp32(self.model_forward(args, kwargs))
File /usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py:16, in autocast_decorator..decorate_autocast(*args, kwargs)
13 @functools.wraps(func)
14 def decorate_autocast(*args, *kwargs):
15 with autocast_instance:
---> 16 return func(args, kwargs)
File /usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py:819, in convert_outputs_to_fp32..forward(*args, kwargs)
818 def forward(*args, *kwargs):
--> 819 return model_forward(args, kwargs)
File /usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py:807, in ConvertOutputsToFp32.call(self, *args, kwargs) 806 def call(self, *args, *kwargs): --> 807 return convert_to_fp32(self.model_forward(args, kwargs))
File /usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py:16, in autocast_decorator..decorate_autocast(*args, kwargs)
13 @functools.wraps(func)
14 def decorate_autocast(*args, *kwargs):
15 with autocast_instance:
---> 16 return func(args, kwargs)
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:169, in add_hook_to_module..new_forward(module, *args, kwargs)
167 output = module._old_forward(*args, *kwargs)
168 else:
--> 169 output = module._old_forward(args, kwargs)
170 return module._hf_hook.post_forward(module, output)
File /usr/local/lib/python3.10/dist-packages/transformers/models/idefics3/modeling_idefics3.py:1145, in Idefics3ForConditionalGeneration.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, labels, use_cache, output_attentions, output_hidden_states, return_dict) 1142 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 1144 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) -> 1145 outputs = self.model( 1146 input_ids=input_ids, 1147 attention_mask=attention_mask, 1148 position_ids=position_ids, 1149 past_key_values=past_key_values, 1150 inputs_embeds=inputs_embeds, 1151 pixel_values=pixel_values, 1152 pixel_attention_mask=pixel_attention_mask, 1153 image_hidden_states=image_hidden_states, 1154 use_cache=use_cache, 1155 output_attentions=output_attentions, 1156 output_hidden_states=output_hidden_states, 1157 return_dict=return_dict, 1158 ) 1160 hidden_states = outputs[0] 1161 logits = self.lm_head(hidden_states)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)
File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None
File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:169, in add_hook_to_module..new_forward(module, *args, kwargs)
167 output = module._old_forward(*args, *kwargs)
168 else:
--> 169 output = module._old_forward(args, kwargs)
170 return module._hf_hook.post_forward(module, output)
File /usr/local/lib/python3.10/dist-packages/transformers/models/idefics3/modeling_idefics3.py:937, in Idefics3Model.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, use_cache, output_attentions, output_hidden_states, return_dict) 935 past_seen_tokens = 0 936 if use_cache: --> 937 past_seen_tokens = past_key_values.get_seq_length() 939 if inputs_embeds is not None and input_ids is None and past_seen_tokens == 0: 940 raise ValueError("When first calling the model, if input_embeds are passed, input_ids should not be None.")
AttributeError: 'NoneType' object has no attribute 'get_seq_length'