Closed syngokhan closed 1 year ago
Hello @syngokhan, could you please share minimal code that we can execute?
@pacman100
Hello, thank you again for your return. I trained a model on the DialoGPT-medium model. Here I wanted to create a TorchScript using the generate part. Since I couldn't find very clear information, I tried it myself. Correct me if I'm wrong please.
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelWithLMHead
import torch
class GenerateModel(torch.nn.Module):
def __init__(self,peft_model_id ):
super().__init__()
self.peft_model_id = peft_model_id
#-----
self.config = PeftConfig.from_pretrained(self.peft_model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.config.base_model_name_or_path)
self.model = AutoModelWithLMHead.from_pretrained(
self.config.base_model_name_or_path,
#load_in_8bit=True,
#torchscript = True
)
self.model = PeftModel.from_pretrained(self.model, self.peft_model_id)
#----
def forward(self,input_ids,attention_mask):
return self.model.generate(
input_ids = input_ids,
attention_mask = attention_mask,
max_length=200,
pad_token_id = self.tokenizer.eos_token_id,
no_repeat_ngram_size=3,
do_sample=True,
top_k=100,
top_p=0.7,
temperature = 0.8
)
if __name__ == "__main__":
path = "DialoGPT/Peft" -> example
device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"
generateModel = GenerateModel(path)
tokens = generateModel.tokenizer.encode_plus("hello how are you",
truncation=True,
add_special_tokens=True,
return_token_type_ids = False,
return_tensors = "pt")
input_ids = tokens["input_ids"].to(device)
attention_mask = tokens["attention_mask"].to(device)
dummy = [input_ids, attention_mask]
with torch.jit.optimized_execution(True):
model_trace = torch.jit.trace(generateModel.to(device),dummy)
Error -->
if input_ids_seq_length >= max_length:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/glb90092345/DialoGPT/Script/TRACE.py:56 in <module> │
│ │
│ 53 │ dummy = [input_ids, attention_mask] │
│ 54 │ │
│ 55 │ with torch.jit.optimized_execution(True): │
│ ❱ 56 │ │ model_trace = torch.jit.trace(generateModel.to(device),dummy) │
│ 57 │
│ │
│ /usr/local/lib/python3.7/site-packages/torch/jit/_trace.py:744 in trace │
│ │
│ 741 │ │ │ check_tolerance, │
│ 742 │ │ │ strict, │
│ 743 │ │ │ _force_outplace, │
│ ❱ 744 │ │ │ _module_class, │
│ 745 │ │ ) │
│ 746 │ │
│ 747 │ if ( │
│ │
│ /usr/local/lib/python3.7/site-packages/torch/jit/_trace.py:959 in trace_module │
│ │
│ 956 │ │ │ │ var_lookup_fn, │
│ 957 │ │ │ │ strict, │
│ 958 │ │ │ │ _force_outplace, │
│ ❱ 959 │ │ │ │ argument_names, │
│ 960 │ │ │ ) │
│ 961 │ │ │ check_trace_method = module._c._get_method(method_name) │
│ 962 │
│ │
│ /usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py:1051 in _call_impl │
│ │
│ 1048 │ │ # this function, and just call forward. │
│ 1049 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1050 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1051 │ │ │ return forward_call(*input, **kwargs) │
│ 1052 │ │ # Do not call functions when jit is used │
│ 1053 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1054 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py:1039 in _slow_forward │
│ │
│ 1036 │ │ │ else: │
│ 1037 │ │ │ │ recording_scopes = False │
│ 1038 │ │ try: │
│ ❱ 1039 │ │ │ result = self.forward(*input, **kwargs) │
│ 1040 │ │ finally: │
│ 1041 │ │ │ if recording_scopes: │
│ 1042 │ │ │ │ tracing_state.pop_scope() │
│ │
│ /home/glb90092345/DialoGPT/Script/TRACE.py:34 in forward │
│ │
│ 31 │ │ │ │ │ │ │ │ │ do_sample=True, │
│ 32 │ │ │ │ │ │ │ │ │ top_k=100, │
│ 33 │ │ │ │ │ │ │ │ │ top_p=0.7, │
│ ❱ 34 │ │ │ │ │ │ │ │ │ temperature = 0.8 │
│ 35 │ │ ) │
│ 36 │
│ 37 │
│ │
│ /usr/local/lib/python3.7/site-packages/peft/peft_model.py:581 in generate │
│ │
│ 578 │ │ self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generati │
│ 579 │ │ try: │
│ 580 │ │ │ if not isinstance(self.peft_config, PromptLearningConfig): │
│ ❱ 581 │ │ │ │ outputs = self.base_model.generate(**kwargs) │
│ 582 │ │ │ else: │
│ 583 │ │ │ │ if "input_ids" not in kwargs: │
│ 584 │ │ │ │ │ raise ValueError("input_ids must be provided for Peft model generati │
│ │
│ /usr/local/lib/python3.7/site-packages/torch/autograd/grad_mode.py:28 in decorate_context │
│ │
│ 25 │ │ @functools.wraps(func) │
│ 26 │ │ def decorate_context(*args, **kwargs): │
│ 27 │ │ │ with self.__class__(): │
│ ❱ 28 │ │ │ │ return func(*args, **kwargs) │
│ 29 │ │ return cast(F, decorate_context) │
│ 30 │ │
│ 31 │ def _wrap_generator(self, func): │
│ │
│ /usr/local/lib/python3.7/site-packages/transformers/generation_utils.py:1336 in generate │
│ │
│ 1333 │ │ │ │ output_scores=output_scores, │
│ 1334 │ │ │ │ return_dict_in_generate=return_dict_in_generate, │
│ 1335 │ │ │ │ synced_gpus=synced_gpus, │
│ ❱ 1336 │ │ │ │ **model_kwargs, │
│ 1337 │ │ │ ) │
│ 1338 │ │ │
│ 1339 │ │ elif is_beam_gen_mode: │
│ │
│ /usr/local/lib/python3.7/site-packages/transformers/generation_utils.py:1921 in sample │
│ │
│ 1918 │ │ │ ) │
│ 1919 │ │ │
│ 1920 │ │ # keep track of which sequences are already finished │
│ ❱ 1921 │ │ unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) │
│ 1922 │ │ cur_len = input_ids.shape[-1] │
│ 1923 │ │ │
│ 1924 │ │ this_peer_finished = False # used by synced_gpus only │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
IndexError: slice() cannot be applied to a 0-dim tensor.
@pacman100
Hello, thank you again for your return. I trained a model on the DialoGPT-medium model. Here I wanted to create a TorchScript using the generate part. Since I couldn't find very clear information, I tried it myself. Correct me if I'm wrong please.
from peft import PeftModel, PeftConfig from transformers import AutoTokenizer, AutoModelWithLMHead import torch class GenerateModel(torch.nn.Module): def __init__(self,peft_model_id ): super().__init__() self.peft_model_id = peft_model_id #----- self.config = PeftConfig.from_pretrained(self.peft_model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.config.base_model_name_or_path) self.model = AutoModelWithLMHead.from_pretrained( self.config.base_model_name_or_path, #load_in_8bit=True, #torchscript = True ) self.model = PeftModel.from_pretrained(self.model, self.peft_model_id) #---- def forward(self,input_ids,attention_mask): return self.model.generate( input_ids = input_ids, attention_mask = attention_mask, max_length=200, pad_token_id = self.tokenizer.eos_token_id, no_repeat_ngram_size=3, do_sample=True, top_k=100, top_p=0.7, temperature = 0.8 ) if __name__ == "__main__": path = "DialoGPT/Peft" -> example device = "cuda" if torch.cuda.is_available() else "cpu" device = "cpu" generateModel = GenerateModel(path) tokens = generateModel.tokenizer.encode_plus("hello how are you", truncation=True, add_special_tokens=True, return_token_type_ids = False, return_tensors = "pt") input_ids = tokens["input_ids"].to(device) attention_mask = tokens["attention_mask"].to(device) dummy = [input_ids, attention_mask] with torch.jit.optimized_execution(True): model_trace = torch.jit.trace(generateModel.to(device),dummy) Error --> if input_ids_seq_length >= max_length: ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /home/glb90092345/DialoGPT/Script/TRACE.py:56 in <module> │ │ │ │ 53 │ dummy = [input_ids, attention_mask] │ │ 54 │ │ │ 55 │ with torch.jit.optimized_execution(True): │ │ ❱ 56 │ │ model_trace = torch.jit.trace(generateModel.to(device),dummy) │ │ 57 │ │ │ │ /usr/local/lib/python3.7/site-packages/torch/jit/_trace.py:744 in trace │ │ │ │ 741 │ │ │ check_tolerance, │ │ 742 │ │ │ strict, │ │ 743 │ │ │ _force_outplace, │ │ ❱ 744 │ │ │ _module_class, │ │ 745 │ │ ) │ │ 746 │ │ │ 747 │ if ( │ │ │ │ /usr/local/lib/python3.7/site-packages/torch/jit/_trace.py:959 in trace_module │ │ │ │ 956 │ │ │ │ var_lookup_fn, │ │ 957 │ │ │ │ strict, │ │ 958 │ │ │ │ _force_outplace, │ │ ❱ 959 │ │ │ │ argument_names, │ │ 960 │ │ │ ) │ │ 961 │ │ │ check_trace_method = module._c._get_method(method_name) │ │ 962 │ │ │ │ /usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py:1051 in _call_impl │ │ │ │ 1048 │ │ # this function, and just call forward. │ │ 1049 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1050 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1051 │ │ │ return forward_call(*input, **kwargs) │ │ 1052 │ │ # Do not call functions when jit is used │ │ 1053 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1054 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ /usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py:1039 in _slow_forward │ │ │ │ 1036 │ │ │ else: │ │ 1037 │ │ │ │ recording_scopes = False │ │ 1038 │ │ try: │ │ ❱ 1039 │ │ │ result = self.forward(*input, **kwargs) │ │ 1040 │ │ finally: │ │ 1041 │ │ │ if recording_scopes: │ │ 1042 │ │ │ │ tracing_state.pop_scope() │ │ │ │ /home/glb90092345/DialoGPT/Script/TRACE.py:34 in forward │ │ │ │ 31 │ │ │ │ │ │ │ │ │ do_sample=True, │ │ 32 │ │ │ │ │ │ │ │ │ top_k=100, │ │ 33 │ │ │ │ │ │ │ │ │ top_p=0.7, │ │ ❱ 34 │ │ │ │ │ │ │ │ │ temperature = 0.8 │ │ 35 │ │ ) │ │ 36 │ │ 37 │ │ │ │ /usr/local/lib/python3.7/site-packages/peft/peft_model.py:581 in generate │ │ │ │ 578 │ │ self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generati │ │ 579 │ │ try: │ │ 580 │ │ │ if not isinstance(self.peft_config, PromptLearningConfig): │ │ ❱ 581 │ │ │ │ outputs = self.base_model.generate(**kwargs) │ │ 582 │ │ │ else: │ │ 583 │ │ │ │ if "input_ids" not in kwargs: │ │ 584 │ │ │ │ │ raise ValueError("input_ids must be provided for Peft model generati │ │ │ │ /usr/local/lib/python3.7/site-packages/torch/autograd/grad_mode.py:28 in decorate_context │ │ │ │ 25 │ │ @functools.wraps(func) │ │ 26 │ │ def decorate_context(*args, **kwargs): │ │ 27 │ │ │ with self.__class__(): │ │ ❱ 28 │ │ │ │ return func(*args, **kwargs) │ │ 29 │ │ return cast(F, decorate_context) │ │ 30 │ │ │ 31 │ def _wrap_generator(self, func): │ │ │ │ /usr/local/lib/python3.7/site-packages/transformers/generation_utils.py:1336 in generate │ │ │ │ 1333 │ │ │ │ output_scores=output_scores, │ │ 1334 │ │ │ │ return_dict_in_generate=return_dict_in_generate, │ │ 1335 │ │ │ │ synced_gpus=synced_gpus, │ │ ❱ 1336 │ │ │ │ **model_kwargs, │ │ 1337 │ │ │ ) │ │ 1338 │ │ │ │ 1339 │ │ elif is_beam_gen_mode: │ │ │ │ /usr/local/lib/python3.7/site-packages/transformers/generation_utils.py:1921 in sample │ │ │ │ 1918 │ │ │ ) │ │ 1919 │ │ │ │ 1920 │ │ # keep track of which sequences are already finished │ │ ❱ 1921 │ │ unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) │ │ 1922 │ │ cur_len = input_ids.shape[-1] │ │ 1923 │ │ │ │ 1924 │ │ this_peer_finished = False # used by synced_gpus only │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ IndexError: slice() cannot be applied to a 0-dim tensor.
Now I changed it to python 3.9.11, but apart from that, it made a big warning, too big to be posted here. Or do I have the opportunity to convert this structure, that is, the gpt model, to TorchScript? I didn't see it in the works but ?
This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.
I trained a model with Peft and I want to convert it as torchscript. Is there a way to this? I tried the normal translation ways for torchscript but got errors.