Open sungerk opened 2 days ago
import argparse
import numpy as np
import MNN
import MNN.numpy as mnn_np # MNN 的 NumPy
import MNN.cv as mnn_cv2
import cv2
import time # 导入time模块
# 之前定义的函数保持不变
def mnn_tensor_to_numpy(mnn_tensor):
return np.array(mnn_tensor.read())
def _get_covariance_matrix(boxes):
gbbs = np.concatenate((np.power(boxes[:, 2:4], 2) / 12, boxes[:, 4:]), axis=-1)
a, b, c = np.split(gbbs, [1, 2], axis=-1)
cov_matrix_1 = a * np.cos(c) ** 2 + b * np.sin(c) ** 2
cov_matrix_2 = a * np.sin(c) ** 2 + b * np.cos(c) ** 2
cov_matrix_3 = a * np.cos(c) * np.sin(c) - b * np.sin(c) * np.cos(c)
return (cov_matrix_1, cov_matrix_2, cov_matrix_3)
def batch_probiou(obb1, obb2, eps=1e-7):
x1, y1 = obb1[:, 0].reshape(-1, 1), obb1[:, 1].reshape(-1, 1)
x2, y2 = obb2[:, 0].reshape(1, -1), obb2[:, 1].reshape(1, -1)
a1, b1, c1 = _get_covariance_matrix(obb1)
a2, b2, c2 = _get_covariance_matrix(obb2)
a2 = a2.reshape(1, -1)
b2 = b2.reshape(1, -1)
c2 = c2.reshape(1, -1)
t1 = (
((a1 + a2) * (np.power(y1 - y2, 2)) + (b1 + b2) * (np.power(x1 - x2, 2)))
/ ((a1 + a2) * (b1 + b2) - (np.power(c1 + c2, 2)) + eps)
) * 0.25
t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (np.power(c1 + c2, 2)) + eps)) * 0.5
t3 = (
np.log(
((a1 + a2) * (b1 + b2) - (np.power(c1 + c2, 2)))
/ (4 * np.sqrt((a1 * b1 - np.power(c1, 2)).clip(0) * (a2 * b2 - np.power(c2, 2)).clip(0)) + eps)
+ eps
)
* 0.5
)
bd = t1 + t2 + t3
bd = np.clip(bd, eps, 100.0)
hd = np.sqrt(1.0 - np.exp(-bd) + eps)
return 1 - hd
def nms_rotated(boxes, scores, threshold=0.45):
if len(boxes) == 0:
return np.empty((0,), dtype=np.int8)
sorted_idx = np.argsort(scores)[::-1]
boxes = boxes[sorted_idx]
ious = batch_probiou(boxes, boxes)
ious = np.triu(ious, k=1)
pick = np.where(ious.max(axis=0) < threshold)[0]
return sorted_idx[pick]
def preprocess_image(img_path, target_size=640):
original_image = mnn_cv2.imread(img_path)
ih, iw, _ = original_image.shape
scale = min(target_size / ih, target_size / iw)
nh, nw = int(ih * scale), int(iw * scale)
image = mnn_cv2.resize(original_image, (nw, nh), 0., 0., mnn_cv2.INTER_LINEAR, -1, [0., 0., 0.],
[1. / 255., 1. / 255., 1. / 255.])
pad_h = (target_size - nh) // 2
pad_w = (target_size - nw) // 2
image = mnn_np.pad(image, [[pad_h, target_size - nh - pad_h], [pad_w, target_size - nw - pad_w], [0, 0]],
'constant')
input_var = mnn_np.expand_dims(image, 0)
input_var = MNN.expr.convert(input_var, MNN.expr.NC4HW4)
return input_var, original_image, pad_h, pad_w, scale
def inference(model, img, precision, backend, thread):
config = {}
config['precision'] = precision
config['backend'] = backend
config['numThread'] = thread
rt = MNN.nn.create_runtime_manager((config,))
net = MNN.nn.load_module_from_file(model, [], [], runtime_manager=rt)
input_var, original_image, pad_h, pad_w, scale = preprocess_image(img)
start_time = time.time()
output_var = net.forward(input_var)
output_var = MNN.expr.convert(output_var, MNN.expr.NCHW)
output_var = output_var.squeeze()
end_time = time.time()
inference_time = (end_time - start_time) * 1000
print(f"Inference time: {inference_time:.2f} ms")
output_var_np = mnn_tensor_to_numpy(output_var)
cx = (output_var_np[0] * 640 - pad_w) / scale
cy = (output_var_np[1] * 640 - pad_h) / scale
w = output_var_np[2] * 640 / scale
h = output_var_np[3] * 640 / scale
probs = output_var_np[4]
mask = probs > 0.25
cx = cx[mask]
cy = cy[mask]
w = w[mask]
h = h[mask]
probs = probs[mask]
angle = output_var_np[5][mask]
boxes = np.stack((cx, cy, w, h, angle), axis=-1)
keep_indices = nms_rotated(boxes, probs, threshold=0.45)
boxes = boxes[keep_indices]
probs = probs[keep_indices]
new_Image = cv2.imread(img)
for i in range(len(probs)):
cx = boxes[i, 0]
cy = boxes[i, 1]
w = boxes[i, 2]
h = boxes[i, 3]
angle = boxes[i, 4]
center = (int(cx), int(cy))
cv2.circle(new_Image, center, 5, (0, 0, 255), -1)
rect = ((float(cx), float(cy)),
(float(w), float(h)),
np.degrees(float(angle)))
print(rect)
box_points = cv2.boxPoints(rect)
box_points = np.intp(box_points)
cv2.drawContours(new_Image, [box_points], 0, (0, 255, 0), 2)
cv2.imwrite('result.jpg', new_Image)
print("Result image saved as 'result.jpg'")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
test_image_path = '/Users/sungerk/a.jpg'
parser.add_argument('--model', type=str, default='./obb.mnn', help='the mobilenet model path')
parser.add_argument('--img', type=str, default=test_image_path, help='the input image path')
parser.add_argument('--precision', type=str, default='normal', help='inference precision: normal, low, high, lowBF')
parser.add_argument('--backend', type=str, default='CPU',
help='inference backend: CPU, OPENCL, OPENGL, NN, VULKAN, METAL, TRT, CUDA, HIAI')
parser.add_argument('--thread', type=int, default=4, help='inference using thread: int')
args = parser.parse_args()
inference(args.model, args.img, args.precision, args.backend, args.thread)
搞定了
参考这个代码 https://github.com/wangzhaode/mnn-yolo
用yolo11n.pt自己转成mnn模型返回的张量是正常的。x,y,cy,cy都是绝对坐标可以解析。
但是yolo11n-obb.pt转成onnx模型再转成mnn模型,进行推理测试返回的张量。如果正常来说应该是x,y,cy,cy,confidence, angle。但是我debug发现返回的结构是[6,8400]。但是好像是相对坐标,转成绝对坐标好像又不对。应该如何解析?