triton-inference-server / server

The Triton Inference Server provides an optimized cloud and edge inferencing solution.
https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html
BSD 3-Clause "New" or "Revised" License
8.37k stars 1.49k forks source link

有人遇到过yolov8n.pt模型转torchscripts和onnx,在triton server或Deepytorch Inference上推理,精度下降的问题吗? #7792

Open JackonLiu opened 1 week ago

JackonLiu commented 1 week ago

Description yolov8n.pt模型转torchscripts和onnx,在triton server或Deepytorch Inference上推理,精度下降

Triton Information What version of Triton are you using? nvcr.io/nvidia/tritonserver:23.04-py3 Are you using the Triton container or did you build it yourself? no
To Reproduce Steps to reproduce the behavior.

  1. yolo export model=best.pt format=onnx opset=15
  2. yolo export model=best.pt format=torchscripts
  3. nano config.pbtxt
  4. docker run --gpus all -it --rm -v /data/triton/models:/models -p 8000:8000 -p 8001:8001 -p 8002:8002 nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models --log-verbose=1
  5. python yolov10_triton.py
  6. output: boxes: tensor([[ 7.4102, 36.6875, 14.7109, 73.1250], [ 15.0469, 26.4531, 29.8750, 52.8438], [ 20.6875, 15.2891, 40.5000, 30.6094], ..., [472.5000, 575.0000, 348.7500, 143.5000], [503.0000, 568.0000, 326.0000, 178.0000], [545.0000, 586.5000, 323.2500, 224.0000]], device='cuda:0') scores: tensor([0., 0., 0., ..., 0., 0., 0.], device='cuda:0') class_ids: tensor([0, 0, 0, ..., 0, 0, 0], device='cuda:0') class_ids>0.7: tensor([], device='cuda:0', dtype=torch.int64)

_ @why this classids is 0?

Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well).

platform: "onnxruntime_onnx" max_batch_size: 16 input [ { name: "images" data_type: TYPE_FP32 dims: [3, 640, 640 ] } ] output [ { name: "output0" data_type: TYPE_FP32 dims: [ -1,-1 ] } ] dynamic_batching { preferred_batch_size: [1, 2, 4, 8, 16] max_queue_delay_microseconds: 100 } instance_group [ { count: 1 kind: KIND_GPU gpus: [0] } ]

name: "bottle_plus" platform: "pytorch_libtorch" max_batch_size: 16 input [ { name: "images0" data_type: TYPE_FP32 dims: [3, 640, 640 ] } ] output [ { name: "output0" data_type: TYPE_FP32 dims: [ 77, 8400 ] } ] dynamic_batching { preferred_batch_size: [1, 2, 4, 8, 16] max_queue_delay_microseconds: 100 } instance_group [ { count: 1 kind: KIND_GPU gpus: [0] } ]

yolov10_triton.py `import io import os from io import BytesIO

import cv2 import torch import torchvision from PIL import Image from tritonclient.grpc import InferenceServerClient, InferInput from tritonclient.utils import *

def getIou(box1, box2, inter_area): box1_area = box1[2] box1[3] box2_area = box2[2] box2[3] union = box1_area + box2_area - inter_area iou = inter_area / union return iou

def getInter(box1, box2): box1_x1, box1_y1, box1_x2, box1_y2 = box1[0] - box1[2] / 2, box1[1] - box1[3] / 2, \ box1[0] + box1[2] / 2, box1[1] + box1[3] / 2 box2_x1, box2_y1, box2_x2, box2_y2 = box2[0] - box2[2] / 2, box2[1] - box1[3] / 2, \ box2[0] + box2[2] / 2, box2[1] + box2[3] / 2 if box1_x1 > box2_x2 or box1_x2 < box2_x1: return 0 if box1_y1 > box2_y2 or box1_y2 < box2_y1: return 0 x_list = [box1_x1, box1_x2, box2_x1, box2_x2] x_list = np.sort(x_list) x_inter = x_list[2] - x_list[1] y_list = [box1_y1, box1_y2, box2_y1, box2_y2] y_list = np.sort(y_list) y_inter = y_list[2] - y_list[1] inter = x_inter * y_inter return inter

class TritonInferer: def init(self, model_name, server_url='localhost:8001'): self.img_src = None self.img = None self.triton_client = InferenceServerClient(url=server_url, verbose=False) self.model_name = model_name self.input_name = 'images' self.output_name = 'output0' self.image_size = 640 self.conf_thres = 0.5 self.iou_thres = 0.5

def preprocess_image(self, payload):
    stream = BytesIO(payload)
    file_bytes = np.asarray(bytearray(stream.read()), dtype=np.uint8)
    img_src = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
    stream.close()
    self.img_src = img_src
    img_size = (self.image_size, self.image_size)
    image = cv2.resize(img_src, img_size)
    image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
    image = image.astype(np.float32) / 255.0  # Normalize to [0, 1]
    self.img = image
    return image, img_src

def rescale(self, ori_shape, boxes, target_shape):
    """Rescale the output to the original image shape"""
    ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
    padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2

    boxes[:, [0, 2]] -= padding[0]
    boxes[:, [1, 3]] -= padding[1]
    boxes[:, :4] /= ratio

    boxes[:, 0].clamp_(0, target_shape[1])  # x1
    boxes[:, 1].clamp_(0, target_shape[0])  # y1
    boxes[:, 2].clamp_(0, target_shape[1])  # x2
    boxes[:, 3].clamp_(0, target_shape[0])  # y2

    return boxes

def predict(self, payload):
    image, original_shape = self.preprocess_image(payload)
    image_input = InferInput(self.input_name, [1, 3, self.image_size, self.image_size], "FP32")
    image_input.set_data_from_numpy(image[np.newaxis, ...])

    results = self.triton_client.infer(
        model_name=self.model_name,
        inputs=[image_input]
    )
    output = results.as_numpy(self.output_name)

    # Here we assume it's a dictionary with 'boxes', 'confidences', and 'class_ids'
    self.parse_output(output)

def non_max_suppression(self, boxes, scores, iou_thres):

    indices = torchvision.ops.nms(torch.tensor(boxes), torch.tensor(scores), iou_thres)
    return boxes[indices], scores[indices]

def parse_output(self, output):

    num_classes = 77  # YOLOv8n 的类别数
    # num_classes = 84  # YOLOv8n 的类别数
    if output.shape == (1, num_classes, 8400):
        output = output.squeeze(0).transpose(1, 0)  # 转换为 (8400, num_classes)
    else:
        raise ValueError(f"Unexpected output shape: {output.shape}")

    boxes = output[:, :4]  # (8400, 4)
    scores = output[:, 4]  # (8400,)
    if np.any(scores > 0):

        max_score = np.max(scores)
        min_score = np.min(scores)

        new_min = 0
        new_max = 1.0

        scaled_scores = (scores - min_score) / (max_score - min_score) * (new_max - new_min) + new_min
        max_score = np.max(scaled_scores)
        min_score = np.min(scaled_scores)
        median_score = np.median(scaled_scores)
        mean_score = np.mean(scaled_scores)
        print(
            f"Scores - Max: {max_score:.4f}, Min: {min_score:.4f}, Median: {median_score:.4f}, Mean: {mean_score:.4f}")

        class_probs = output[:, 5:]  # (8400, 80)

        # 将 NumPy 数组转换为 PyTorch 张量
        output_tensor = torch.from_numpy(class_probs)

        # 使用 argmax
        class_indices = torch.argmax(output_tensor, dim=1)  # 获取每个锚点的最大类别的索引
        print("class_ids:", class_indices)

        # 过滤掉低置信度的检测结果
        confidence_threshold = 0.7
        mask = scaled_scores > confidence_threshold
        boxes = boxes[mask]
        scores = scaled_scores[mask]
        class_ids = class_indices[mask]
        # Apply Non-Maximum Suppression
        boxes1, confidences = self.non_max_suppression(boxes, scores, 0.45)

        print("class_ids>0.7:", class_ids)
        boxes_xyxy = torch.from_numpy(boxes1)
        boxes_xyxy_rescale = self.rescale(self.img.shape[1:], boxes_xyxy, self.img_src.shape).round()
        prediction_list = []
        class_ids = class_ids.cpu().numpy()
        boxes_xyxy_rescale = boxes_xyxy_rescale.cpu().numpy()
        for n in range(boxes.shape[0]):
            xy = boxes_xyxy_rescale[n]
            c = class_ids[n]
            # p = scores[n]
            p = confidences[n]
            Location = {'Left': int(xy[0]), 'Top': int(xy[1]), 'Weight': int(xy[2]),
                        'Height': int(xy[3])}
            i = {'Layer': c, 'Probablity': round(p, 4), 'Location': Location}
            prediction = [i['Layer'], i['Probablity'], i['Location']['Left'], i['Location']['Top'],
                          i['Location']['Weight'], i['Location']['Height']]
            print(prediction)
            prediction_list.append(prediction)

        return prediction_list

bottle_name = 'v10_bottle5_240925' bottle_plus_name = 'v10_bottle_plus11_240925' yolov8_wt = str(r"D:\workspace_py\pxys-model-rest\weights\bottle{}.pt".format(bottle_name)) yolov8_plus_wt = str(r"D:\workspace_py\pxys-model-rest\weights\bottle{}.pt".format(bottle_plus_name))

triton_inferer = TritonInferer(model_name='bottle_plus_onnx')

image_folder = r'E:\data\数据库\9月瓶\新品由柑汁\obj_train_data\images'

for filename in os.listdir(image_folder): if filename.endswith('.jpg') or filename.endswith('.png'): image_path = os.path.join(image_folder, filename)

    with Image.open(image_path) as img:

        img = img.convert('RGB')

        byte_arr = io.BytesIO()

        img.save(byte_arr, format='PNG')

        payload = byte_arr.getvalue()
    predictions = triton_inferer.predict(payload)
    print(predictions)

` Expected behavior A clear and concise description of what you expected to happen. expected not 0