microsoft / onnxruntime

ONNX Runtime: cross-platform, high performance ML inferencing and training accelerator
https://onnxruntime.ai
MIT License
14.65k stars 2.93k forks source link

[CPU] quant op input type error #13938

Open vacing opened 1 year ago

vacing commented 1 year ago

Describe the issue

I have a quanted model(int8),

To reproduce

convnets_modified_v1.zip

import cv2
import time
import torch
import numpy as np
import onnx
import onnxruntime as ort
import multiprocessing

class OnnxModel():
    def __init__(self, onnx_path):
        self.onnx_session = onnxruntime.InferenceSession(onnx_path)
        self.input_name = self.get_input_name(self.onnx_session)
        self.output_name = self.get_output_name(self.onnx_session)
        print("input_name:{}".format(self.input_name))
        print("output_name:{}".format(self.output_name))

    def get_output_name(self, onnx_session):
        output_name = []
        for node in onnx_session.get_outputs():
            output_name.append(node.name)
        return output_name

    def get_input_name(self, onnx_session):
        input_name = []
        for node in onnx_session.get_inputs():
            input_name.append(node.name)
        return input_name

    def get_input_feed(self, input_name, image_numpy):
        input_feed = {}
        for name in input_name:
            input_feed[name] = image_numpy
        return input_feed

    def forward(self, image_numpy):
        # scores, boxes = self.onnx_session.run(None, {self.input_name: image_numpy})
        # scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: iimage_numpy})
        input_feed = self.get_input_feed(self.input_name, image_numpy)
        scores, boxes = self.onnx_session.run(self.output_name, input_feed=input_feed)
        return scores, boxes

def sleepTime(hour, min, sec):
    return hour * 3600 + min * 60 + sec

def get_tensor_from_videoStream(frame):
    frame_list = []
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_list.append(frame)
    # cap.release()
    result_frame = torch.as_tensor(np.stack(frame_list))
    # print(np.stack(frame_list).shape)
    src = np.stack(frame_list)
    src = src.transpose(0,3,1,2)
    [a,b,c,d] = src.shape
    float_list = np.empty(src.shape, dtype = np.float32)
    for i in range(a):
        for j in range(b):
            for m in range(c):
                for n in range(d):
                    float_list[i,j,m,n] = src[i,j,m,n] / 255.0
    return float_list

if __name__=='__main__':
    A = np.array([0.0], dtype = np.float32)
    B = np.array([0.25], dtype = np.float32)
    A1 = np.resize(A, (1,1,1,1))
    r1i = np.zeros((1, 16, 80, 90), dtype=np.float32)
    r2i = np.zeros((1, 20, 40, 45), dtype=np.float32)
    r3i = np.zeros((1, 40, 20, 23), dtype=np.float32)
    r4i = np.zeros((1, 64, 10, 12), dtype=np.float32)
    downsample_ratio = torch.as_tensor(B)

    second = sleepTime(0, 0, 1)
    sess_options = ort.SessionOptions()
    sess_options.intra_op_num_threads = 1
    # sess_options.inter_op_num_threads = 0

    sess_options.enable_profiling = True

    # sess_options.intra_op_num_threads = 2
    # sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
    # sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

    model_name = "ort_web/convnets_modified_v1.onnx"
    video_path = 0
    video_path = "/data/meeting_02_720x640.mp4"
    print(multiprocessing.cpu_count())
    ort_session = ort.InferenceSession(model_name, sess_options, providers=['CPUExecutionProvider'])
    # ort_session.set_providers(['CUDAExecutionProvider'])
    options = ort_session.get_session_options()

    cap = cv2.VideoCapture(video_path)

    nums = 0
    avg = 0
    while nums <= 10:
        nums += 1
        ret, frame = cap.read()
        dim = (180, 160)
        resized = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
        tensor = get_tensor_from_videoStream(resized)
        print(tensor.shape)

        start_time = time.time()
        pha, r1o, r2o, r3o, r4o = ort_session.run(None, {'src':tensor, 'r1i':r1i, 'r2i':r2i, 'r3i':r3i, 'r4i':r4i})
        # outputs= ort_session.run(None, {'src':tensor, 'r1i':A1, 'r2i':A1, 'r3i':A1, 'r4i':A1, 'downsample_ratio':B})
        end_time = time.time()

        bgr = np.array([0.47, 1., 0.6]).reshape((3, 1, 1))

        duration = end_time - start_time
        if avg == 0:
            avg = duration
        else:
            exp = 0.8
            avg = avg * exp + duration * (1-exp)
        print(f"the running time is : {avg} s")
        # print(outputs)

Urgency

urgent

Platform

Linux

OS Version

cenos-7.9

ONNX Runtime Installation

Built from Source

ONNX Runtime Version or Commit ID

release version

ONNX Runtime API

Python

Architecture

X64

Execution Provider

Default CPU

Execution Provider Library Version

No response

vacing commented 1 year ago

@shalvamist another issue related to quant model, please check if any information I missed, thanks a lot

vacing commented 1 year ago

Hi @baijumeswani, it't not only on web, but python has the same problem also.

shalvamist commented 1 year ago

Hi @vacing,

Sorry for the delayed response I was dealing with some backlog. I was able to reproduce the issue on my end using python. Let me dig a bit deeper for the root cause. I will keep you posted with my findings.