Open XinyuDu opened 4 years ago
基于pytorch版本的bertspanner训练了一个NER模型。在pytorch下推理结果很好。但是通过torch.onnx.export将其转换成onnx模型后,再用onnx模型推理结果跟之前pytorch模型推理结果不一致。是我漏掉了什么?还是模型中有onnx不支持的op?谢谢!
pytorch 推理代码如下:
from processors.ner_span import InputExample from processors.ner_span import ner_processors as processors from processors.ner_span import convert_examples_to_features from processors.utils_ner import bert_extract_item from models.transformers import WEIGHTS_NAME,BertConfig,AlbertConfig from models.bert_for_ner import BertSpanForNer from processors.utils_ner import CNerTokenizer from models.albert_for_ner import AlbertSpanForNer import torch label_list = ['O', 'TIM', 'UNI', 'TIT', 'COM', 'NAM', 'MAI', 'PHO', 'PNM','X',"[CLS]", "[SEP]"] num_labels = len(label_list) id2label = {i: label for i, label in enumerate(label_list)} MODEL_CLASSES = { ## bert ernie bert_wwm bert_wwwm_ext 'bert': (BertConfig, BertSpanForNer, CNerTokenizer), 'albert': (AlbertConfig,AlbertSpanForNer,CNerTokenizer) } config_class, model_class, tokenizer_class = MODEL_CLASSES['bert'] tokenizer = tokenizer_class.from_pretrained('outputresumebert', do_lower_case=True) sentence = '王建国,电话:13885699528,email:wjg@wjg.com,2000年9月-至今北京大学 本科' example = InputExample(guid=0, text_a=sentence, subject=[]) feature = convert_examples_to_features([example], label_list, 128, tokenizer) input_ids = torch.tensor([feature[0].input_ids], dtype=torch.long) input_mask = torch.tensor([feature[0].input_mask], dtype=torch.long) model = model_class.from_pretrained('outputresumebert') model.to(torch.device("cpu")) model.eval() with torch.no_grad(): # CORE-2 inputs = {"input_ids": input_ids, "attention_mask": input_mask,"start_positions": None,"end_positions": None} outputs = model(**inputs) start_logits, end_logits = outputs[:2] R = bert_extract_item(start_logits, end_logits) label_entities = [[id2label[x[0]],x[1],x[2]] for x in R] result=[] for label in label_entities: result.append([label[0],sentence[label[1]:label[2]+1],[label[1],label[2]]]) print(result)
推理结果: [['NAM', '王建国', [0, 2]], ['PHO', '13885699528', [7, 17]], ['MAI', 'wjg@wjg.com', [25, 35]], ['TIM', '2000年9月', [37, 43]], ['UNI', '北京大学', [47, 50]]]
[['NAM', '王建国', [0, 2]], ['PHO', '13885699528', [7, 17]], ['MAI', 'wjg@wjg.com', [25, 35]], ['TIM', '2000年9月', [37, 43]], ['UNI', '北京大学', [47, 50]]]
onnx推理代码如下:
import onnxruntime import numpy as np ort_session = onnxruntime.InferenceSession("./convert_pytorch_to_tf/resumebert.onnx") def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() # compute ONNX Runtime output prediction sentence = '王建国,电话:13885699528,email:wjg@wjg.com,2000年9月-至今北京大学 本科' example = InputExample(guid=0, text_a=sentence, subject=[]) feature = convert_examples_to_features([example], label_list, 128, tokenizer) input_ids = torch.tensor([feature[0].input_ids], dtype=torch.long) input_mask = torch.tensor([feature[0].input_mask], dtype=torch.long) ort_inputs = {'input_ids': to_numpy(input_ids), 'input_mask': to_numpy(input_mask)} ort_outs = ort_session.run(None, ort_inputs) start_logits = torch.from_numpy(ort_outs[0]) end_logits = torch.from_numpy(ort_outs[1]) R = bert_extract_item(start_logits, end_logits) label_entities = [[id2label[x[0]],x[1],x[2]] for x in R] result=[] for label in label_entities: result.append([label[0],sentence[label[1]:label[2]+1],[label[1],label[2]]]) print(result)
推理结果: [['NAM', '王建国', [0, 2]], ['PHO', '13885699528', [7, 17]], ['TIM', '2000年9月-至今', [37, 46]]]
[['NAM', '王建国', [0, 2]], ['PHO', '13885699528', [7, 17]], ['TIM', '2000年9月-至今', [37, 46]]]
模型转换代码如下:
from processors.ner_span import InputExample from processors.ner_span import ner_processors as processors from processors.ner_span import convert_examples_to_features from processors.utils_ner import bert_extract_item from models.transformers import WEIGHTS_NAME,BertConfig,AlbertConfig from models.bert_for_ner import BertSpanForNer from processors.utils_ner import CNerTokenizer from models.albert_for_ner import AlbertSpanForNer import torch import numpy as np label_list = ['O', 'TIM', 'UNI', 'TIT', 'COM', 'NAM', 'MAI', 'PHO', 'PNM','X',"[CLS]", "[SEP]"] num_labels = len(label_list) id2label = {i: label for i, label in enumerate(label_list)} MODEL_CLASSES = { ## bert ernie bert_wwm bert_wwwm_ext 'bert': (BertConfig, BertSpanForNer, CNerTokenizer), 'albert': (AlbertConfig,AlbertSpanForNer,CNerTokenizer) } config_class, model_class, tokenizer_class = MODEL_CLASSES['bert'] tokenizer = tokenizer_class.from_pretrained('outputresumebert', do_lower_case=True) sentence = '王建国,电话:13885699528,email:wjg@wjg.com,2000年9月-至今北京大学 本科' example = InputExample(guid=0, text_a=sentence, subject=[]) feature = convert_examples_to_features([example], label_list, 128, tokenizer) input_ids = torch.tensor([feature[0].input_ids], dtype=torch.long) input_mask = torch.tensor([feature[0].input_mask], dtype=torch.long) model = model_class.from_pretrained('outputresumebert') model.eval() torch.onnx.export(model, (input_ids,input_mask), "convert_pytorch_to_tf/resumebert.onnx", opset_version=10, do_constant_folding=True, input_names=['input_ids','input_mask'], output_names=['start', 'end'], dynamic_axes={'input_ids': {0 : 'batch_size'}, # variable lenght axes 'input_mask': {0 : 'batch_size'}} )
你的问题解决了吗?现在遇到同样的问题。
基于pytorch版本的bertspanner训练了一个NER模型。在pytorch下推理结果很好。但是通过torch.onnx.export将其转换成onnx模型后,再用onnx模型推理结果跟之前pytorch模型推理结果不一致。是我漏掉了什么?还是模型中有onnx不支持的op?谢谢!
pytorch 推理代码如下:
推理结果:
[['NAM', '王建国', [0, 2]], ['PHO', '13885699528', [7, 17]], ['MAI', 'wjg@wjg.com', [25, 35]], ['TIM', '2000年9月', [37, 43]], ['UNI', '北京大学', [47, 50]]]
onnx推理代码如下:
推理结果:
[['NAM', '王建国', [0, 2]], ['PHO', '13885699528', [7, 17]], ['TIM', '2000年9月-至今', [37, 46]]]
模型转换代码如下: