[ONNXRuntimeError] : 10 : INVALID_GRAPH : Load model from best_nms.onnx failed:This is an invalid model. Type Error: Type 'tensor(float)' of input parameter (conf_thres) of operator (NonMaxSuppression) in node () is invalid.

Describe the issue

Device Name: Jetson Xavier NX Developer Toolkit

I am trying to convert the yolov8m model to onnx which was successful. Then i wanted to add nms node to its graph the error I am facing is as follows:

To reproduce

The below code is how i converted my model to onnx:


import numpy as np
from models.yolov8.ultralytics.yolo.utils.ops import non_max_suppression
from models.yolov8.ultralytics.yolo.utils.ops import  scale_boxes
from util.general import letterbox
import cv2
import torch
from models.yolov8.ultralytics.nn.autobackend import AutoBackend
def preprocess_image(your_raw_image):
    frame=cv2.imread(your_raw_image)

    im = letterbox(frame, [1088,1088], stride=32, auto=True)[0] 
    im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) 
    im = torch.from_numpy(im).float().to('cuda')
    im /= 255.0
    im = im[None]

    return frame,im
def xywh2xyxy(x):
    """
    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner.

    Args:
        x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
    Returns:
        y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
    """
    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y
def xywh2yxyx(x):
    """
    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner.

    Args:
        x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
    Returns:
        y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
    """
    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] =   x[..., 1] - x[..., 3] / 2
    y[..., 1] = x[..., 0] - x[..., 2] / 2
    y[..., 2] =   x[..., 1] + x[..., 3] / 2
    y[..., 3] = x[..., 0] + x[..., 2] / 2
    return y
import torch.nn as nn
import torch.nn as nn
import onnx

class YourCombinedModel(nn.Module):
    def __init__(self):
        super(YourCombinedModel, self).__init__()
        self.base_model = AutoBackend( 
            "/home/data/model_zoo/visdrones/v8M/best.pt",
             torch.device("cuda"),
             False, 
            ["pedestrian", "people","bicycle", "car", "van","truck","tricycle","awning-tricycle","bus","motor"],
              False
            )
        conf_thres=0.3
        score_thres=0.0
        iou_thres=0

    def forward(self, input):
        conf_thres=torch.Tensor([0.3])
        score_thres=torch.Tensor([0.0])
        iou_thres=torch.Tensor([0.0])
        # Forward pass through your base model
        self.base_output = self.base_model(input)[0]
        self.bs = self.base_output.shape[0]  # batch size
        self.nc = self.base_output.shape[1] - 0 - 4  # number of classes
        self.mi = 4 + self.nc  # mask start index
        self.xc = self.base_output[:, 4:self.mi].amax(1) > 0.3  # candidates
        self.max_wh = 7680  # (pixels) maximum box width and height
        self.max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
        self.time_limit = 0.5 + 0.05 * self.bs  # seconds to quit after
        self.redundant = 1  # require redundant detections

        self.merge = False  # use merge-NMS
        self.flag=False
        self.onnx_tensors = []
        self.shape=tuple()
        self.xi=0

        self.x=self.base_output.view(self.base_output.shape[1],-1)

        self.x = self.x.transpose(0, -1)[self.xc[self.xi]] 
        box, cls, mask = self.x.split((4, self.nc, 0), 1)

        box = xywh2xyxy(box)
        conf, j = cls.max(1, keepdim=True)
        self.x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > 0.3]
        n = self.x.shape[0]  # number of boxes
        if not n:  # no boxes
            return
        self.x = self.x[self.x[:, 4].argsort(descending=True)[:self.max_nms]]  # sort by confidence and remove excess boxes

    # Batched NMS
        c =self.x[:, 5:6] * (0 if False else self.max_wh)  # classes
        boxes, scores = self.x[:, :4] + c, self.x[:, 4] 
        return boxes,scores,score_thres,iou_thres,conf_thres

import torch.onnx

# Create an instance of your combined model
combined_model = YourCombinedModel()

# Provide example input tensor (adjust shape and data type accordingly)
example_input = torch.randn(1, 3, 640,1088).cuda()

# Export the combined model to ONNX
onnx_path = "combined_model_builtin_var.onnx"

torch.onnx.export(
    model=combined_model,
    args=example_input,
    verbose=True,  # Set verbose to 1 for True
    f=onnx_path,
    input_names = ["input_name"],
    output_names=["boxes","scores","conf_thres","score_thres","iou_thres"]
)

import onnx
frame,input_data = preprocess_image("dummy_frame.jpg")
model_path='/home/necop-ai-jnx-01/tas-profiling/combined_model.onnx'
onnx_model = onnx.load_model("combined_model_builtin_var.onnx")
onnx_fpath = f"best_nms.onnx"
graph = onnx_model.graph
inputs=["boxes", "scores","conf_thres","score_thres","iou_thres",]
nms_node = onnx.helper.make_node(
    'NonMaxSuppression',
    inputs,
    ["selected_indices"],
    center_point_box=1,  # Set to 0 to indicate [x_min, y_min, x_max, y_max] format.

)

graph.node.append(nms_node)
output_value_info = onnx.helper.make_tensor_value_info("selected_indices", onnx.TensorProto.INT64, shape=["num_results",3])
graph.output.append(output_value_info)
onnx.checker.check_model(onnx_model)
onnx.save(onnx_model, onnx_fpath)

The below is for error reproducibility

import onnx
import onnxruntime
from util.box_ops import zero_negative_point
from models.yolov8.ultralytics.yolo.utils.ops import  scale_boxes
import torchvision
import cv2
import torch.onnx
import numpy as np

**session = onnxruntime.InferenceSession("best_nms.onnx",providers=['CUDAExecutionProvider']) ------> Here occurs the error**
input_name = session.get_inputs()[0].name
output_name1 = session.get_outputs()[0].name
# output_name2 = session.get_outputs()[1].name

input_data1=input_data.cpu()
input_data1=np.array(input_data1,dtype=np.float32)
# input_data.cpu()
# print(input_data)
output = session.run([output_name1], {input_name: input_data1})

Urgency

This is an urgent issue as I am on a deadline.

Platform

Linux

OS Version

Ubuntu 20.08 Jetpack version 5.1.1

ONNX Runtime Installation

Built from Source

ONNX Runtime Version or Commit ID

1.15.1

ONNX Runtime API

Python

Architecture

ARM64

Execution Provider

CUDA

Execution Provider Library Version

No response

microsoft / onnxruntime