Open andysingal opened 8 months ago
from datasets import load_dataset from random import randint # Load our test dataset eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train") rand_idx = randint(0, len(eval_dataset)) # Test on sample prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True) outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id) print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}") print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}") print(f"Generated Answer:\n{outputs[0]['generated_text'][len(prompt):].strip()}")
gave error:
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) Cell In[14], line 11 9 # Test on sample 10 prompt = pipe.tokenizer.apply_chat_template(eval_dataset[rand_idx]["messages"][:2], tokenize=False, add_generation_prompt=True) ---> 11 outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id) 13 print(f"Query:\n{eval_dataset[rand_idx]['messages'][1]['content']}") 14 print(f"Original Answer:\n{eval_dataset[rand_idx]['messages'][2]['content']}") File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:208, in TextGenerationPipeline.__call__(self, text_inputs, **kwargs) 167 def __call__(self, text_inputs, **kwargs): 168 """ 169 Complete the prompt(s) given as inputs. 170 (...) 206 ids of the generated text. 207 """ --> 208 return super().__call__(text_inputs, **kwargs) File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1140, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs) 1132 return next( 1133 iter( 1134 self.get_iterator( (...) 1137 ) 1138 ) 1139 else: -> 1140 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params) File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1147, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params) 1145 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params): 1146 model_inputs = self.preprocess(inputs, **preprocess_params) -> 1147 model_outputs = self.forward(model_inputs, **forward_params) 1148 outputs = self.postprocess(model_outputs, **postprocess_params) 1149 return outputs File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/base.py:1046, in Pipeline.forward(self, model_inputs, **forward_params) 1044 with inference_context(): 1045 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device) -> 1046 model_outputs = self._forward(model_inputs, **forward_params) 1047 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu")) 1048 else: File /usr/local/lib/python3.10/dist-packages/transformers/pipelines/text_generation.py:271, in TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs) 268 generate_kwargs["min_length"] += prefix_length 270 # BS x SL --> 271 generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs) 272 out_b = generated_sequence.shape[0] 273 if self.framework == "pt": File /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:1140, in PeftModelForCausalLM.generate(self, **kwargs) 1138 self.base_model.generation_config = self.generation_config 1139 try: -> 1140 outputs = self.base_model.generate(**kwargs) 1141 except: 1142 self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation File /usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs) 112 @functools.wraps(func) 113 def decorate_context(*args, **kwargs): 114 with ctx_factory(): --> 115 return func(*args, **kwargs) File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1718, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs) 1701 return self.assisted_decoding( 1702 input_ids, 1703 assistant_model=assistant_model, (...) 1714 **model_kwargs, 1715 ) 1716 if generation_mode == GenerationMode.GREEDY_SEARCH: 1717 # 11. run greedy search -> 1718 return self.greedy_search( 1719 input_ids, 1720 logits_processor=logits_processor, 1721 stopping_criteria=stopping_criteria, 1722 pad_token_id=generation_config.pad_token_id, 1723 eos_token_id=generation_config.eos_token_id, 1724 output_scores=generation_config.output_scores, 1725 return_dict_in_generate=generation_config.return_dict_in_generate, 1726 synced_gpus=synced_gpus, 1727 streamer=streamer, 1728 **model_kwargs, 1729 ) 1731 elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH: 1732 if not model_kwargs["use_cache"]: File /usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:2579, in GenerationMixin.greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs) 2576 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) 2578 # forward pass to get next token -> 2579 outputs = self( 2580 **model_inputs, 2581 return_dict=True, 2582 output_attentions=output_attentions, 2583 output_hidden_states=output_hidden_states, 2584 ) 2586 if synced_gpus and this_peer_finished: 2587 continue # don't waste resources running the code we don't need File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1529 try: 1530 result = None File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 163 output = module._old_forward(*args, **kwargs) 164 else: --> 165 output = module._old_forward(*args, **kwargs) 166 return module._hf_hook.post_forward(module, output) File /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:1199, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict) 1197 logits = torch.cat(logits, dim=-1) 1198 else: -> 1199 logits = self.lm_head(hidden_states) 1200 logits = logits.float() 1202 loss = None File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) File /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1529 try: 1530 result = None File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:160, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 159 def new_forward(module, *args, **kwargs): --> 160 args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) 161 if module._hf_hook.no_grad: 162 with torch.no_grad(): File /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:293, in AlignDevicesHook.pre_forward(self, module, *args, **kwargs) 291 if self.weights_map[name].dtype == torch.int8: 292 fp16_statistics = self.weights_map[name.replace("weight", "SCB")] --> 293 set_module_tensor_to_device( 294 module, name, self.execution_device, value=self.weights_map[name], fp16_statistics=fp16_statistics 295 ) 297 return send_to_device(args, self.execution_device), send_to_device( 298 kwargs, self.execution_device, skip_keys=self.skip_keys 299 ) File /usr/local/lib/python3.10/dist-packages/accelerate/utils/modeling.py:347, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics) 345 module._parameters[tensor_name] = param_cls(new_value, requires_grad=old_value.requires_grad) 346 elif isinstance(value, torch.Tensor): --> 347 new_value = value.to(device) 348 else: 349 new_value = torch.tensor(value, device=device) NotImplementedError: Cannot copy out of meta tensor; no data!
Try again with restarting the Kernel it seems you GPU is already busy
gave error: