I have pruned and quantized several models using your toolkit, and I'm currently aiming to do inference using your pipeline to my gpt2 code generation model. To do so I need to export the model to the onnx format. Running trainer.export_to_onnx on both the int8 and fp32 model fails to export it, raising IndexError: Dimension out of range. Using torch.onnx.export alone works just fine, therefore the error must be somewhere in the methods in trainer.py. I want to avoid using torch.onnx.export because I need the optimized performance for the int8 model, offered by your executor backend.
import os
import numpy as np
from datasets import load_dataset, Dataset, load_metric
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, DataCollatorForLanguageModeling,EvalPrediction
from intel_extension_for_transformers.optimization.trainer import NLPTrainer
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True, nb_workers=32)
from tqdm import tqdm
tqdm.pandas()
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["WANDB_DISABLED"] = "true"
dataset = load_dataset("0n1xus/codexglue", 'code-completion-token-py150')
model = AutoModelForCausalLM.from_pretrained("dscc/CodeGPT-Py150_q_all_layers_sym_per_tensor")
tokenizer = AutoTokenizer.from_pretrained("microsoft/CodeGPT-small-py", truncation_side='left')
train = dataset['train'].to_pandas()
valid = dataset['validation'].to_pandas()
test = dataset['test'].to_pandas()
# Sample just 10 examples to speed up tokenization
train = train.sample(10)
valid = valid.sample(10)
test = test.sample(10)
# We split the input code snippet and take the last line as the expected output and the rest as input
def split_input_output(x):
split_seq = x['code'].split('<EOL>')
return ['<EOL>'.join(split_seq[0:-1]) + '<EOL>', split_seq[-1]]
# Tokenize the sequences
# CodeGPT supports 1024 tokens max so we truncate longer sequences
# We add padding to make sure all sequences the same length
def tokenize_both(batch):
return tokenizer(batch['input'], batch['output'], padding="max_length", truncation=True, max_length=1024, return_tensors='pt')
# Apply split function to the Dataframes
train[['input', 'output']] = train.progress_apply(split_input_output, axis =1, result_type="expand")
valid[['input', 'output']] = valid.progress_apply(split_input_output, axis =1, result_type="expand")
test[['input', 'output']] = test.progress_apply(split_input_output, axis =1, result_type="expand")
# Tokenize train and valid
train_ds = Dataset.from_pandas(train, split="train")
train_ds = train_ds.map(tokenize_both, num_proc=1)
valid_ds = Dataset.from_pandas(valid, split="validation")
valid_ds = valid_ds.map(tokenize_both, num_proc=1)
training_args = TrainingArguments(
no_cuda=True,
do_train=True,
do_eval=True,
output_dir="./test_epoch", #The output directory
overwrite_output_dir=True, #overwrite the content of the output directory
num_train_epochs=2, # number of training epochs
per_device_train_batch_size=1, # batch size for training
per_device_eval_batch_size=1, # batch size for evaluation
eval_steps=200, # Number of update steps between two evaluations.
save_steps=800, # after # steps model is saved
warmup_steps=500, # number of warmup steps for learning rate scheduler
prediction_loss_only=True,
seed=42
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors='pt')
task_metric = load_metric("accuracy")
def compute_metrics(p: EvalPrediction):
preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
preds = np.argmax(preds, axis=1)
result = task_metric.compute(predictions=preds, references=p.label_ids)
if len(result) > 1:
result["combined_score"] = np.mean(list(result.values())).item()
return result
trainer = NLPTrainer(
model=model,
args=training_args,
train_dataset=train_ds,
eval_dataset=valid_ds,
data_collator=data_collator,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.enable_inc_quant = True
trainer.enable_executor = True
trainer.export_to_onnx("test.onnx")
Raising:
File "code_completion.py", line 96, in <module>
trainer.export_to_onnx("test.onnx")
File "\venv\lib\site-packages\intel_extension_for_transformers\optimization\trainer.py", line 2006, in export_to_onnx
self.export_to_int8_onnx(*args, **kwargs)
File "\venv\lib\site-packages\intel_extension_for_transformers\optimization\trainer.py", line 2179, in export_to_int8_onnx
self.export_to_fp32_onnx(fp32_path,
File "\venv\lib\site-packages\intel_extension_for_transformers\optimization\trainer.py", line 2045, in export_to_fp32_onnx
torch.onnx.export(
File "\venv\lib\site-packages\torch\onnx\utils.py", line 506, in export
_export(
File "\venv\lib\site-packages\torch\onnx\utils.py", line 1548, in _export
graph, params_dict, torch_out = _model_to_graph(
File "\venv\lib\site-packages\torch\onnx\utils.py", line 1113, in _model_to_graph
graph, params, torch_out, module = _create_jit_graph(model, args)
File "\venv\lib\site-packages\torch\onnx\utils.py", line 989, in _create_jit_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args)
File "\venv\lib\site-packages\torch\onnx\utils.py", line 893, in _trace_and_get_graph_from_model
trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
File "\venv\lib\site-packages\torch\nn\modules\module.py", line 1488, in _slow_forward
result = self.forward(*input, **kwargs)
File "\venv\lib\site-packages\transformers\models\gpt2\modeling_gpt2.py", line 1076, in forward
transformer_outputs = self.transformer(
File "\venv\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "\venv\lib\site-packages\torch\nn\modules\module.py", line 1488, in _slow_forward
result = self.forward(*input, **kwargs)
File "\venv\lib\site-packages\transformers\models\gpt2\modeling_gpt2.py", line 800, in forward
past_length = past_key_values[0][0].size(-2)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got -2)
Urgency
medium-high
Platform
Linux/Windows
Package versions
python 3.9
torch 2.0.1
transformers 4.29.2
intel-extension-for-transformers 1.0.1
neural-compressor 2.1.1
(rest of them are all up to date)
Hi intel team,
I have pruned and quantized several models using your toolkit, and I'm currently aiming to do inference using your pipeline to my gpt2 code generation model. To do so I need to export the model to the onnx format. Running
trainer.export_to_onnx
on both the int8 and fp32 model fails to export it, raisingIndexError: Dimension out of range
. Usingtorch.onnx.export
alone works just fine, therefore the error must be somewhere in the methods intrainer.py
. I want to avoid usingtorch.onnx.export
because I need the optimized performance for the int8 model, offered by your executor backend.Quantized model at: https://huggingface.co/dscc/CodeGPT-Py150_q_all_layers_sym_per_tensor/tree/main
To reproduce:
Raising:
Urgency
medium-high
Platform
Linux/Windows
Package versions
python 3.9 torch 2.0.1 transformers 4.29.2 intel-extension-for-transformers 1.0.1 neural-compressor 2.1.1 (rest of them are all up to date)