Finetuning 7B codellama: Runtime error

Kushalamummigatti commented 1 year ago

Am trying to finetune codellama with the same idea of llama2 and using the same script to finetune. Am not sure whether am right as the repo or blog not talking about finetune approach.

Am facing this error. RuntimeError: shape '[-1, 32000]' is invalid for input of size 131073504

RuntimeError Traceback (most recent call last) Cell In[10], line 29 20 trainer = Trainer( 21 model=model, 22 args=training_args, (...) 25 callbacks=[profiler_callback] if enable_profiler else [], 26 ) 28 # Start training ---> 29 trainer.train()

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:1662, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1657 self.model_wrapped = self.model 1659 inner_training_loop = find_executable_batch_size( 1660 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size 1661 ) -> 1662 return inner_training_loop( 1663 args=args, 1664 resume_from_checkpoint=resume_from_checkpoint, 1665 trial=trial, 1666 ignore_keys_for_eval=ignore_keys_for_eval, 1667 )

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:1929, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 1927 tr_loss_step = self.training_step(model, inputs) 1928 else: -> 1929 tr_loss_step = self.training_step(model, inputs) 1931 if ( 1932 args.logging_nan_inf_filter 1933 and not is_torch_tpu_available() 1934 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) 1935 ): 1936 # if loss is nan or inf simply add the average of previous logged losses 1937 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:2699, in Trainer.training_step(self, model, inputs) 2696 return loss_mb.reduce_mean().detach().to(self.args.device) 2698 with self.compute_loss_context_manager(): -> 2699 loss = self.compute_loss(model, inputs) 2701 if self.args.n_gpu > 1: 2702 loss = loss.mean() # mean() to average on multi-gpu parallel training

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/trainer.py:2731, in Trainer.compute_loss(self, model, inputs, return_outputs) 2729 else: 2730 labels = None -> 2731 outputs = model(**inputs) 2732 # Save past state if it exists 2733 # TODO: this needs to be fixed and made cleaner later. 2734 if self.args.past_index >= 0:

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/peft/peft_model.py:947, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, kwargs) 936 raise AssertionError("forward in MPTForCausalLM does not support inputs_embeds") 937 return self.base_model( 938 input_ids=input_ids, 939 attention_mask=attention_mask, (...) 944 kwargs, 945 ) --> 947 return self.base_model( 948 input_ids=input_ids, 949 attention_mask=attention_mask, 950 inputs_embeds=inputs_embeds, 951 labels=labels, 952 output_attentions=output_attentions, 953 output_hidden_states=output_hidden_states, 954 return_dict=return_dict, 955 **kwargs, 956 ) 958 batch_size = input_ids.shape[0] 959 if attention_mask is not None: 960 # concat prompt attention mask

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/accelerate/hooks.py:165, in add_hook_to_module..new_forward(*args, kwargs) 163 output = old_forward(*args, *kwargs) 164 else: --> 165 output = old_forward(args, kwargs) 166 return module._hf_hook.post_forward(module, output)

File /opt/conda/envs/llama_cona_env/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:709, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict) 707 # Flatten the tokens 708 loss_fct = CrossEntropyLoss() --> 709 shift_logits = shift_logits.view(-1, self.config.vocab_size) 710 shift_labels = shift_labels.view(-1) 711 # Enable model parallelism

RuntimeError: shape '[-1, 32000]' is invalid for input of size 131073504

zhanghainan commented 1 year ago

the same issue

Kushalamummigatti commented 1 year ago

@zhanghainan Are you using adapter weights or converted weights? Have you thought of any other way to finetune?

zhanghainan commented 1 year ago

I convert the weights to huggingface(https://github.com/FlagAlpha/Llama2-Chinese/tree/main/scripts/convert2hf)，and utilize the https://github.com/FlagAlpha/Llama2-Chinese/tree/main/train/sft/finetune.sh to finetune. Then I got the same issue.

zhanghainan commented 1 year ago

I find that the vocab size is "vocab_size": 32016 rather than 32000. I set the "vocab_size" as 32016 and then successful.

Kushalamummigatti commented 1 year ago

@zhanghainan Could you please share me code snippet to update the vocab size.

zhanghainan commented 1 year ago

I convert the model as hugging face model. In config.json, I set the vocab size.

Kushalamummigatti commented 1 year ago

@zhanghainan Strange. Even i have converted the model. In config.json vocab size is as below image.

But when i print the tokenizer its like below

CarolXh commented 1 year ago

I have the same issue and solve it in your way. However, when I do the inference, I encounter a problem as following and it can not do a simple inference correctly. Have you met the problem? warning info: Some weights of LlamaForCausalLM were not initialized from the model checkpoint at /data/sonald/projects/codellama/codellama-sft and are newly initialized: ['model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.39.self_attn.rotary_emb.inv_freq', 'model.layers.33.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.34.self_attn.rotary_emb.inv_freq', 'model.layers.32.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.13.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.30.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.27.self_attn.rotary_emb.inv_freq', 'model.layers.28.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.35.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.38.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.36.self_attn.rotary_emb.inv_freq', 'model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.37.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

zhanghainan commented 1 year ago

@CarolXh I utilize the code like bellow， and it works successfully in inference.

import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel,PeftConfig finetune_model_path='../../models/sft_codellama' config = PeftConfig.from_pretrained(finetune_model_path) base_model_name_or_path='../../models/codellama-hf' tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,use_fast=False) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,device_map='auto',torch_dtype=torch.float16,load_in_8bit=True) model = PeftModel.from_pretrained(model, finetune_model_path, device_map={"": 0}) model =model.eval() input_ids = tokenizer(["[INST]XXX[/INST] "], return_tensors="pt",add_special_tokens=False).input_ids.to('cuda')
generate_input = { "input_ids":input_ids, "max_new_tokens":512, "do_sample":True, "top_k":50, "top_p":0.95, "temperature":0.3, "repetition_penalty":1.3, "eos_token_id":tokenizer.eos_token_id, "bos_token_id":tokenizer.bos_token_id, "pad_token_id":tokenizer.pad_token_id } generate_ids = model.generate(**generate_input) text = tokenizer.decode(generate_ids[0]) print(text)

meta-llama / codellama

Finetuning 7B codellama: Runtime error #64