chainyo / transformers-pipeline-onnx

How to export Hugging Face's 🤗 NLP Transformers models to ONNX and use the exported model with the appropriate Transformers pipeline.
23 stars 0 forks source link

Getting incorrect output #3

Closed ujjawalcse closed 1 year ago

ujjawalcse commented 2 years ago

Hey @ChainYo , I tried your script with my custom fine-tuned model. But the output is not as expected. It's predicting for each tokens instead. Here are some outputs sample, [{'entity_group': 'LABEL_6', 'score': 0.14850605, 'word': 'ab', 'start': 0, 'end': 2}, {'entity_group': 'LABEL_0', 'score': 0.12011145, 'word': '##hishe', 'start': 2, 'end': 7}, {'entity_group': 'LABEL_6', 'score': 0.11439563, 'word': '##k kumar', 'start': 7, 'end': 14}, {'entity_group': 'LABEL_13', 'score': 0.11188321, 'word': 'education', 'start': 16, 'end': 25}, {'entity_group': 'LABEL_0', 'score': 0.11445558, 'word': '&', 'start': 26, 'end': 27}, {'entity_group': 'LABEL_13', 'score': 0.10697147, 'word': 'credentials', 'start': 28, 'end': 39}, {'entity_group': 'LABEL_9', 'score': 0.12449409, 'word': 'msc (', 'start': 40, 'end': 45}, {'entity_group': 'LABEL_13', 'score': 0.123251475, 'word': 'information', 'start': 45, 'end': 56}, {'entity_group': 'LABEL_0', 'score': 0.13867705, 'word': 'technology management', 'start': 57, 'end': 78}, {'entity_group': 'LABEL_1', 'score': 0.11498813, 'word': ')', 'start': 78, 'end': 79}, {'entity_group': 'LABEL_8', 'score': 0.12129795, 'word': 'from', 'start': 80, 'end': 84}, {'entity_group': 'LABEL_6', 'score': 0.12780227, 'word': 'university of', 'start': 85, 'end': 98}]

My onnx exporting script is as follows:

import torch
from transformers import BertTokenizerFast, BertForTokenClassification
from transformers.convert_graph_to_onnx import convert
from pathlib import Path

RESUME_NUM_LABELS = 14

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH =  'BertBaseUncased'                              #'YituTech/conv-bert-base'          #'bert-base-uncased'

RESUME_STATE_DICT = torch.load("models/model-state-resume_14342_chunks_02-02.bin", map_location=DEVICE)
TOKENIZER = BertTokenizerFast.from_pretrained('C:\\Users\\Ujjawal\\Downloads\\docker_util\\BertBaseUncased')
MAX_LEN = 512

resume_model = BertForTokenClassification.from_pretrained(
    MODEL_PATH, state_dict=RESUME_STATE_DICT['model_state_dict'], num_labels=RESUME_NUM_LABELS)
resume_model.to(DEVICE)
resume_model.eval()
output=Path('C:\\Users\\Ujjawal\\Downloads\\docker_util\\output\\onnx\\ner_model_v2.onnx').absolute()
convert(pipeline='ner',
        framework="pt",
        model=resume_model,
        tokenizer=TOKENIZER,
        output=output,
        opset=11)

Here is the inference script provided by you,

import torch
from time import time
from onnxruntime import (
    InferenceSession, SessionOptions, GraphOptimizationLevel
)
from transformers import (
    TokenClassificationPipeline, AutoTokenizer, AutoModelForTokenClassification,BertTokenizerFast,BertForTokenClassification
)

options = SessionOptions() # initialize session options
options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

session = InferenceSession(
    "output/onnx/ner_model_v1.onnx", sess_options=options, providers=["CPUExecutionProvider"]
)

# disable session.run() fallback mechanism, it prevents for a reset of the execution provider
session.disable_fallback() 

class OnnxTokenClassificationPipeline(TokenClassificationPipeline):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def _forward(self, model_inputs):
        """
        Forward pass through the model. This method is not to be called by the user directly and is only used
        by the pipeline to perform the actual predictions.

        This is where we will define the actual process to do inference with the ONNX model and the session created
        before.
        """

        # This comes from the original implementation of the pipeline
        special_tokens_mask = model_inputs.pop("special_tokens_mask")
        offset_mapping = model_inputs.pop("offset_mapping", None)
        sentence = model_inputs.pop("sentence")

        inputs = {k: v.cpu().detach().numpy() for k, v in model_inputs.items()} # dict of numpy arrays
        outputs_name = session.get_outputs()[0].name # get the name of the output tensor

        logits = session.run(output_names=[outputs_name], input_feed=inputs)[0] # run the session
        logits = torch.tensor(logits) # convert to torch tensor to be compatible with the original implementation

        return {
            "logits": logits,
            "special_tokens_mask": special_tokens_mask,
            "offset_mapping": offset_mapping,
            "sentence": sentence,
            **model_inputs,
        }

    # We need to override the preprocess method because the onnx model is waiting for the attention masks as inputs
    # along with the embeddings.
    def preprocess(self, sentence, offset_mapping=None):
        truncation = True if self.tokenizer.model_max_length and self.tokenizer.model_max_length > 0 else False
        model_inputs = self.tokenizer(
            sentence,
            return_attention_mask=True, # This is the only difference from the original implementation
            return_tensors=self.framework,
            truncation=truncation,
            return_special_tokens_mask=True,
            return_offsets_mapping=self.tokenizer.is_fast,
        )
        if offset_mapping:
            model_inputs["offset_mapping"] = offset_mapping

        model_inputs["sentence"] = sentence

        return model_inputs

RESUME_NUM_LABELS = 14
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH =  'BertBaseUncased'                              #'YituTech/conv-bert-base'          #'bert-base-uncased'

RESUME_STATE_DICT = torch.load("models/model-state-resume_14342_chunks_02-02.bin", map_location=DEVICE)
TOKENIZER = AutoTokenizer.from_pretrained('C:\\Users\\Ujjawal\\Downloads\\docker_util\\BertBaseUncased')
MAX_LEN = 512

resume_model = AutoModelForTokenClassification.from_pretrained(
    MODEL_PATH, state_dict=RESUME_STATE_DICT['model_state_dict'], num_labels=RESUME_NUM_LABELS)

onnx_pipeline = OnnxTokenClassificationPipeline(
    task="ner", 
    model=resume_model,
    tokenizer=TOKENIZER,
    framework="pt",
    aggregation_strategy="simple",
)

text="Abhishek Kumar  Education & Credentials MSc (Information Technology Management) from University of Bradford, UK in 2017; secured 74% (Distinction) Senior Certificate in Computer Science & Engineering from University of Florida, US in 2013; secured 3.1/4.0  Bachelor of Technology in Computer Science Engineering from Jaypee University of Information & Technology, HP, India in 2012;"
t1=time()
res=onnx_pipeline(text)
t2=time()
print('result',res)
print('Time Taken : ',(t2-t1))

Can you just guide where I'm doing wrong? Thanks.

chainyo commented 2 years ago

Hello @ujjawalcse :wave: Thanks for opening this issue!

It's normal, there is an ignore_labels argument you can pass to your pipeline to avoid some tokens. By default "0" tokens are ignored.

Check the documentation here.

ujjawalcse commented 2 years ago

Got it. Thanks.