Open liyang619 opened 2 years ago
Hi, Unfortunately Mutate currently doesn't support training. However, if you wish to finetune your LLM - following the steps in the repo would probably help - https://github.com/Xirider/finetune-gpt2xl
From this from mutate import pipeline
...:
...: pipe = pipeline("text-classification-synthesis",
...: model="EleutherAI/gpt-neo-2.7B")
...:
...: task_dec ="Each item in the following contains Swahili news texts that are classified as Kitaifa (National),Kimataifa (International), Biashara (Business),Michezo (Sports) and Burudani (Entertainment) "
...:
...:
...: # returns a python generator
...: text_synth_gen = pipe("csv",
...: data_files=["Train.csv"],
...: task_desc=task_desc,
...: text_column="content",
...: label_column="category",
...: text_column_alias="News",
...: label_column_alias="Category",
...: shot_count=5,
...: class_names=['Kimataifa', 'Burudani'])
...:
...: #Loop through the generator to synthesize examples by class
...: for synthesized_examples in text_synth_gen:
...: print(synthesized_examples)
num_beams
is set to 1. However, early_stopping
is set to True
-- this flag is only used in beam-based generation modes. You should set num_beams>1
or unset early_stopping
.
warnings.warn(
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set padding_side='left'
when initializing the tokenizer.
/home/ubuntu/transformers/src/transformers/generation/utils.py:1362: UserWarning: Input length of input_ids is 3341, but max_length
is set to 300. This can lead to unexpected behavior. You should consider increasing max_new_tokens
.
warnings.warn(IndexError Traceback (most recent call last) Cell In[85], line 21 10 text_synth_gen = pipe("csv", 11 data_files=["Train.csv"], 12 task_desc=task_desc, (...) 17 shot_count=5, 18 class_names=['Kimataifa', 'Burudani']) 20 #Loop through the generator to synthesize examples by class ---> 21 for synthesized_examples in text_synth_gen: 22 print(synthesized_examples)
File ~/.local/lib/python3.8/site-packages/mutate/pipelines/text_classification.py:200, in TextClassificationSynthesize.call(self, dataset_path, text_column, label_column, task_desc, split, data_files, data_dir, text_column_alias, label_column_alias, class_names, dataset_args, dataset_kwargs, batch_size, shot_count, infinite_loop, **kwargs) 198 batch_parsed_examples = [] 199 batch_class_names = [] --> 200 batch_generated_texts = self.infer.run_single_batch( 201 batch, generate_args=self.generate_kwargs 202 ) 204 num_return_sequences = len(batch_generated_texts) // len(batch["input_ids"]) 206 for idx, generated_text in enumerate(batch_generated_texts):
File ~/.local/lib/python3.8/site-packages/mutate/infer.py:52, in TextGeneration.run_single_batch(self, batch, is_include_prompt_in_generation, ignore_prompt_last_line, generate_args) 44 def run_single_batch( 45 self, 46 batch, (...) 49 generate_args: Optional[Dict[str, str]] = None, 50 ): 51 batch_size = len(batch["input_ids"]) ---> 52 generated_sequences = self.model.generate( 53 batch["input_ids"], attention_mask=batch["attention_mask"], **generate_args 54 ) 55 generated_sequences = generated_sequences.cpu() 56 num_return_sequences = len(generated_sequences) // batch_size
File /usr/local/lib/python3.8/dist-packages/torch/utils/_contextlib.py:115, in context_decorator.
File ~/transformers/src/transformers/generation/utils.py:1763, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, kwargs) 1755 input_ids, model_kwargs = self._expand_inputs_for_generation( 1756 input_ids=input_ids, 1757 expand_size=generation_config.num_return_sequences, 1758 is_encoder_decoder=self.config.is_encoder_decoder, 1759 model_kwargs, 1760 ) 1762 # 13. run sample -> 1763 return self.sample( 1764 input_ids, 1765 logits_processor=logits_processor, 1766 logits_warper=logits_warper, 1767 stopping_criteria=stopping_criteria, 1768 pad_token_id=generation_config.pad_token_id, 1769 eos_token_id=generation_config.eos_token_id, 1770 output_scores=generation_config.output_scores, 1771 return_dict_in_generate=generation_config.return_dict_in_generate, 1772 synced_gpus=synced_gpus, 1773 streamer=streamer, 1774 **model_kwargs, 1775 ) 1777 elif generation_mode == GenerationMode.BEAM_SEARCH: 1778 # 11. prepare beam search scorer 1779 beam_scorer = BeamSearchScorer( 1780 batch_size=batch_size, 1781 num_beams=generation_config.num_beams, (...) 1786 max_length=generation_config.max_length, 1787 )
File ~/transformers/src/transformers/generation/utils.py:2860, in GenerationMixin.sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, model_kwargs) 2857 model_inputs = self.prepare_inputs_for_generation(input_ids, model_kwargs) 2859 # forward pass to get next token -> 2860 outputs = self( 2861 **model_inputs, 2862 return_dict=True, 2863 output_attentions=output_attentions, 2864 output_hidden_states=output_hidden_states, 2865 ) 2867 if synced_gpus and this_peer_finished: 2868 continue # don't waste resources running the code we don't need
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None
File ~/transformers/src/transformers/models/gpt_neo/modeling_gpt_neo.py:954, in GPTNeoForCausalLM.forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
946 r"""
947 labels (torch.LongTensor
of shape (batch_size, sequence_length)
, optional):
948 Labels for language modeling. Note that the labels are shifted inside the model, i.e. you can set
949 labels = input_ids
Indices are selected in [-100, 0, ..., config.vocab_size]
All labels set to -100
950 are ignored (masked), the loss is only computed for labels in [0, ..., config.vocab_size]
951 """
952 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
--> 954 transformer_outputs = self.transformer(
955 input_ids,
956 past_key_values=past_key_values,
957 attention_mask=attention_mask,
958 token_type_ids=token_type_ids,
959 position_ids=position_ids,
960 head_mask=head_mask,
961 inputs_embeds=inputs_embeds,
962 use_cache=use_cache,
963 output_attentions=output_attentions,
964 output_hidden_states=output_hidden_states,
965 return_dict=return_dict,
966 )
967 hidden_states = transformer_outputs[0]
969 lm_logits = self.lm_head(hidden_states)
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None
File ~/transformers/src/transformers/models/gpt_neo/modeling_gpt_neo.py:778, in GPTNeoModel.forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict) 776 if inputs_embeds is None: 777 inputs_embeds = self.wte(input_ids) --> 778 position_embeds = self.wpe(position_ids) 779 hidden_states = inputs_embeds + position_embeds 781 # Attention mask.
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None
File /usr/local/lib/python3.8/dist-packages/torch/nn/modules/sparse.py:162, in Embedding.forward(self, input) 161 def forward(self, input: Tensor) -> Tensor: --> 162 return F.embedding( 163 input, self.weight, self.padding_idx, self.max_norm, 164 self.norm_type, self.scale_grad_by_freq, self.sparse)
File /usr/local/lib/python3.8/dist-packages/torch/nn/functional.py:2233, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 2227 # Note [embedding_renorm set_grad_enabled] 2228 # XXX: equivalent to 2229 # with torch.no_grad(): 2230 # torch.embeddingrenorm 2231 # remove once script supports set_grad_enabled 2232 _no_grad_embeddingrenorm(weight, input, max_norm, norm_type) -> 2233 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self
Hi~ could I train the model and update parameters by mutate prompting code?