riskycheng / YoloV8s_DetectionDoorLock

the box door detection with YoloV8s, applied with RKNN(RK3588)
MIT License
1 stars 0 forks source link

the export operation which convert the pt model to onnx, are you using the official export function? #1

Closed wenbindu closed 7 months ago

wenbindu commented 10 months ago
  1. Some blogs said that the pt model should be exported to onnx with the parameter opt=12, but I found that you did not set the parameter.
  2. Can you provide some script to export the rknn from onnx? Thank you so much. I just meet some errors during the convert steps.
riskycheng commented 7 months ago

sorry for the late response. Issue 1: it really depends on the inference framework capbilities. Issue 2: you may run the following to convert ONNX to rknn ( 1.5.2 and 1.6.0 verified, you need to have RKNN setup/rknn.api in your env ):

import cv2
import numpy as np
from rknn.api import RKNN

ONNX_MODEL = './best.onnx'
RKNN_MODEL = './best.rknn'
DATASET = './datasets.txt'

QUANTIZE_ON = True

CLASSES = {0:'box', 1:'box_open'} 

nmsThresh = 0.45
objectThresh = 0.5

model_h = 640
model_w = 640 

color_palette = np.random.uniform(0, 255, size=(len(CLASSES), 3))

def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)

def draw_detections(img, box, score, class_id):
    """
    Draws bounding boxes and labels on the input image based on the detected objects.

    Args:
        img: The input image to draw detections on.
        box: Detected bounding box.
        score: Corresponding detection score.
        class_id: Class ID for the detected object.

    Returns:
        None
    """

    # Extract the coordinates of the bounding box
    x1, y1, w, h = box

    # Retrieve the color for the class ID
    color = color_palette[class_id]

    # Draw the bounding box on the image
    cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

    # Create the label text with class name and score
    label = f'{CLASSES[class_id]}: {score:.2f}'

    # Calculate the dimensions of the label text
    (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

    # Calculate the position of the label text
    label_x = x1
    label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

    # Draw a filled rectangle as the background for the label text
    cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
                  cv2.FILLED)

    # Draw the label text on the image
    cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def postprocess(input_image, outputs):
    img_h, img_w = input_image.shape[:2]
    boxes0 = np.transpose(np.squeeze(outputs[0]))
    scores0 = np.transpose(np.squeeze(outputs[1]))

    if len(scores0.shape) == 1:
        scores0 = np.expand_dims(scores0, axis=1)
    scores = sigmoid(scores0)
    max_scores = np.max(scores, axis=1)  # select the max score for multiple classes
    max_indices = np.argmax(scores, axis=1)

    t = np.where(max_scores >= objectThresh)[0]

    boxes = boxes0[t]
    scores = max_scores[t]
    class_ids = max_indices[t]

    # high -> low ordered by score
    sorted_indices = np.argsort(scores)[::-1]
    boxes = boxes[sorted_indices]
    scores = scores[sorted_indices]
    class_ids = class_ids[sorted_indices]

    print(boxes)
    print(scores)
    print(class_ids)

    # Get the number of rows in the outputs array
    rows = boxes.shape[0]

    # Lists to store the bounding boxes, scores, and class IDs of the detections
    boxes_ = []
    scores_ = []
    class_ids_ = []

    # Calculate the scaling factors for the bounding box coordinates
    x_factor = img_w / model_w
    y_factor = img_h / model_h

    # Iterate over each row in the outputs array
    for i in range(rows):
        # Extract the class scores from the current row
        classes_scores = scores[i]

        # Find the maximum score among the class scores
        max_score = np.amax(classes_scores)

        # If the maximum score is above the confidence threshold
        if max_score >= objectThresh:
            # Get the class ID with the highest score
            class_id = np.argmax(classes_scores)

            # Extract the bounding box coordinates from the current row
            x, y, w, h = boxes[i]

            # Calculate the scaled coordinates of the bounding box
            left = int((x - w / 2) * x_factor)
            top = int((y - h / 2) * y_factor)
            width = int(w * x_factor)
            height = int(h * y_factor)

            # Add the class ID, score, and box coordinates to the respective lists
            class_ids_.append(class_id)
            scores_.append(max_score)
            boxes_.append([left, top, width, height])

    print(boxes_)
    print(scores_)
    print(class_ids_)

    # Apply non-maximum suppression to filter out overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes_, scores_, score_threshold=objectThresh, nms_threshold=nmsThresh)

    # Iterate over the selected indices after non-maximum suppression
    for i in indices:
        # Get the box, score, and class ID corresponding to the index
        box = boxes_[i]
        score = scores_[i]
        class_id = class_ids_[i]

        # Draw the detection on the input image
        draw_detections(input_image, box, score, class_id)
    return input_image

def export_rknn():
    rknn = RKNN(verbose=True)

    rknn.config(
        # see:ultralytics/yolo/data/utils.py
        mean_values=[[0, 0, 0]],
        std_values=[[255, 255, 255]],
        # TODO: adjust the following values
        # mean_values=[[123.675, 116.28, 103.53]],  # IMAGENET_MEAN = 0.485, 0.456, 0.406
        # std_values=[[58.395, 57.12, 57.375]],  # IMAGENET_STD = 0.229, 0.224, 0.225
        quantized_algorithm='normal',
        quantized_method='channel',
        # optimization_level=2,
        compress_weight=False,  # compress model to reduce the rknn model size
        # single_core_mode=True,
        # model_pruning=False,  # pruning model to reduce the model size
        target_platform='rk3588'
    )
    rknn.load_onnx(
        model=ONNX_MODEL,
        outputs=[
            '/model.22/Mul_2_output_0', '/model.22/Split_output_1',
        ]
    )
    rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, rknn_batch_size=1)
    rknn.export_rknn(RKNN_MODEL)

    # evaluation purposes
    # rknn.accuracy_analysis(
    #     inputs=['/home/workspace/onnx2rknn_YOLOv8/onnx_model/official/zidane.jpg'],
    #     output_dir="./snapshot",
    #     target=None
    # )

    rknn.init_runtime()
    return rknn

if __name__ == '__main__':
    # data preparation
    img_path = './images/video_08_00_frame_000001.jpg'
    orig_img = cv2.imread(img_path)
    # img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
    img = orig_img
    img_h, img_w = img.shape[:2]
    resized_img, ratio, (dw, dh) = letterbox(img, new_shape=(model_h, model_w))  # padding resize
    # resized_img = cv2.resize(img, (model_w, model_h), interpolation=cv2.INTER_LINEAR) # direct resize
    input = np.expand_dims(resized_img, axis=0)

    # export to RKNN model
    rknn = export_rknn()

    # sample inference
    outputs = rknn.inference(inputs=[input], data_format="nhwc")

    # post processing model output
    result_img = postprocess(resized_img, outputs)

    cv2.imwrite('./images/video_08_00_frame_000001_res.jpg', result_img)

    rknn.release()