alibaba / MNN

MNN is a blazing fast, lightweight deep learning framework, battle-tested by business-critical use cases in Alibaba
http://www.mnn.zone/
8.66k stars 1.66k forks source link

yolov11-obb 检测demo #3054

Open sungerk opened 2 days ago

sungerk commented 2 days ago

参考这个代码 https://github.com/wangzhaode/mnn-yolo

用yolo11n.pt自己转成mnn模型返回的张量是正常的。x,y,cy,cy都是绝对坐标可以解析。

但是yolo11n-obb.pt转成onnx模型再转成mnn模型,进行推理测试返回的张量。如果正常来说应该是x,y,cy,cy,confidence, angle。但是我debug发现返回的结构是[6,8400]。但是好像是相对坐标,转成绝对坐标好像又不对。应该如何解析?

sungerk commented 2 days ago
import argparse
import numpy as np
import MNN
import MNN.numpy as mnn_np  # MNN 的 NumPy
import MNN.cv as mnn_cv2
import cv2
import time  # 导入time模块

# 之前定义的函数保持不变
def mnn_tensor_to_numpy(mnn_tensor):
    return np.array(mnn_tensor.read())

def _get_covariance_matrix(boxes):
    gbbs = np.concatenate((np.power(boxes[:, 2:4], 2) / 12, boxes[:, 4:]), axis=-1)
    a, b, c = np.split(gbbs, [1, 2], axis=-1)
    cov_matrix_1 = a * np.cos(c) ** 2 + b * np.sin(c) ** 2
    cov_matrix_2 = a * np.sin(c) ** 2 + b * np.cos(c) ** 2
    cov_matrix_3 = a * np.cos(c) * np.sin(c) - b * np.sin(c) * np.cos(c)
    return (cov_matrix_1, cov_matrix_2, cov_matrix_3)

def batch_probiou(obb1, obb2, eps=1e-7):
    x1, y1 = obb1[:, 0].reshape(-1, 1), obb1[:, 1].reshape(-1, 1)
    x2, y2 = obb2[:, 0].reshape(1, -1), obb2[:, 1].reshape(1, -1)
    a1, b1, c1 = _get_covariance_matrix(obb1)
    a2, b2, c2 = _get_covariance_matrix(obb2)
    a2 = a2.reshape(1, -1)
    b2 = b2.reshape(1, -1)
    c2 = c2.reshape(1, -1)
    t1 = (
                 ((a1 + a2) * (np.power(y1 - y2, 2)) + (b1 + b2) * (np.power(x1 - x2, 2)))
                 / ((a1 + a2) * (b1 + b2) - (np.power(c1 + c2, 2)) + eps)
         ) * 0.25
    t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (np.power(c1 + c2, 2)) + eps)) * 0.5
    t3 = (
            np.log(
                ((a1 + a2) * (b1 + b2) - (np.power(c1 + c2, 2)))
                / (4 * np.sqrt((a1 * b1 - np.power(c1, 2)).clip(0) * (a2 * b2 - np.power(c2, 2)).clip(0)) + eps)
                + eps
            )
            * 0.5
    )
    bd = t1 + t2 + t3
    bd = np.clip(bd, eps, 100.0)
    hd = np.sqrt(1.0 - np.exp(-bd) + eps)
    return 1 - hd

def nms_rotated(boxes, scores, threshold=0.45):
    if len(boxes) == 0:
        return np.empty((0,), dtype=np.int8)

    sorted_idx = np.argsort(scores)[::-1]
    boxes = boxes[sorted_idx]
    ious = batch_probiou(boxes, boxes)
    ious = np.triu(ious, k=1)
    pick = np.where(ious.max(axis=0) < threshold)[0]

    return sorted_idx[pick]

def preprocess_image(img_path, target_size=640):
    original_image = mnn_cv2.imread(img_path)
    ih, iw, _ = original_image.shape
    scale = min(target_size / ih, target_size / iw)
    nh, nw = int(ih * scale), int(iw * scale)
    image = mnn_cv2.resize(original_image, (nw, nh), 0., 0., mnn_cv2.INTER_LINEAR, -1, [0., 0., 0.],
                           [1. / 255., 1. / 255., 1. / 255.])

    pad_h = (target_size - nh) // 2
    pad_w = (target_size - nw) // 2
    image = mnn_np.pad(image, [[pad_h, target_size - nh - pad_h], [pad_w, target_size - nw - pad_w], [0, 0]],
                       'constant')

    input_var = mnn_np.expand_dims(image, 0)
    input_var = MNN.expr.convert(input_var, MNN.expr.NC4HW4)

    return input_var, original_image, pad_h, pad_w, scale

def inference(model, img, precision, backend, thread):
    config = {}
    config['precision'] = precision
    config['backend'] = backend
    config['numThread'] = thread
    rt = MNN.nn.create_runtime_manager((config,))
    net = MNN.nn.load_module_from_file(model, [], [], runtime_manager=rt)

    input_var, original_image, pad_h, pad_w, scale = preprocess_image(img)

    start_time = time.time()
    output_var = net.forward(input_var)
    output_var = MNN.expr.convert(output_var, MNN.expr.NCHW)
    output_var = output_var.squeeze()
    end_time = time.time()
    inference_time = (end_time - start_time) * 1000
    print(f"Inference time: {inference_time:.2f} ms")

    output_var_np = mnn_tensor_to_numpy(output_var)

    cx = (output_var_np[0] * 640 - pad_w) / scale
    cy = (output_var_np[1] * 640 - pad_h) / scale
    w = output_var_np[2] * 640 / scale
    h = output_var_np[3] * 640 / scale
    probs = output_var_np[4]

    mask = probs > 0.25
    cx = cx[mask]
    cy = cy[mask]
    w = w[mask]
    h = h[mask]
    probs = probs[mask]
    angle = output_var_np[5][mask]

    boxes = np.stack((cx, cy, w, h, angle), axis=-1)

    keep_indices = nms_rotated(boxes, probs, threshold=0.45)
    boxes = boxes[keep_indices]
    probs = probs[keep_indices]
    new_Image = cv2.imread(img)

    for i in range(len(probs)):
        cx = boxes[i, 0]
        cy = boxes[i, 1]
        w = boxes[i, 2]
        h = boxes[i, 3]
        angle = boxes[i, 4]

        center = (int(cx), int(cy))
        cv2.circle(new_Image, center, 5, (0, 0, 255), -1)

        rect = ((float(cx), float(cy)),
                (float(w), float(h)),
                np.degrees(float(angle)))
        print(rect)
        box_points = cv2.boxPoints(rect)
        box_points = np.intp(box_points)
        cv2.drawContours(new_Image, [box_points], 0, (0, 255, 0), 2)

    cv2.imwrite('result.jpg', new_Image)
    print("Result image saved as 'result.jpg'")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    test_image_path = '/Users/sungerk/a.jpg'
    parser.add_argument('--model', type=str, default='./obb.mnn', help='the mobilenet model path')
    parser.add_argument('--img', type=str, default=test_image_path, help='the input image path')
    parser.add_argument('--precision', type=str, default='normal', help='inference precision: normal, low, high, lowBF')
    parser.add_argument('--backend', type=str, default='CPU',
                        help='inference backend: CPU, OPENCL, OPENGL, NN, VULKAN, METAL, TRT, CUDA, HIAI')
    parser.add_argument('--thread', type=int, default=4, help='inference using thread: int')
    args = parser.parse_args()
    inference(args.model, args.img, args.precision, args.backend, args.thread)

搞定了