derronqi / yolov8-face

yolov8 face detection with landmark
GNU General Public License v3.0
485 stars 64 forks source link

Can't load my own exported .onnx model from official yolov8n-face.pt, how to correctly export .pt to .onnx model? #21

Closed fn-hide closed 6 months ago

fn-hide commented 8 months ago

I export official yolov8n-face.pt model to .onnx like this

pt_model = YOLO('../../../.deepface/weights/yolov8n-face.pt')
pt_model.export(format='onnx', opset=12)

Then I load my own .onnx after I export the model like this

onnx_model = YOLOv8_face('../../../.deepface/weights/yolov8n-face.onnx')

But, I got an error

---------------------------------------------------------------------------
error                                     Traceback (most recent call last)
Cell In[45], [line 4](vscode-notebook-cell:?execution_count=45&line=4)
      [1](vscode-notebook-cell:?execution_count=45&line=1) # onnx_model = YOLOv8('../../../.deepface/weights/yolov8n-face.onnx')
      [2](vscode-notebook-cell:?execution_count=45&line=2) # onnx_model = YOLOv8('c:/Users/febri/Downloads/yolov8n-face.onnx')
      [3](vscode-notebook-cell:?execution_count=45&line=3) # onnx_model = YOLOv8_face('c:/Users/febri/Downloads/yolov8n-face.onnx')
----> [4](vscode-notebook-cell:?execution_count=45&line=4) onnx_model = YOLOv8_face('../../../.deepface/weights/yolov8n-face.onnx')

File [c:\Users\febri\projects\EATS_AI\development\YOLOv8_OpenCV\main.py:13](file:///C:/Users/febri/projects/EATS_AI/development/YOLOv8_OpenCV/main.py:13), in YOLOv8_face.__init__(self, path, conf_thres, iou_thres)
     [11](file:///C:/Users/febri/projects/EATS_AI/development/YOLOv8_OpenCV/main.py:11) self.num_classes = len(self.class_names)
     [12](file:///C:/Users/febri/projects/EATS_AI/development/YOLOv8_OpenCV/main.py:12) # Initialize model
---> [13](file:///C:/Users/febri/projects/EATS_AI/development/YOLOv8_OpenCV/main.py:13) self.net = cv2.dnn.readNet(path)
     [14](file:///C:/Users/febri/projects/EATS_AI/development/YOLOv8_OpenCV/main.py:14) self.input_height = 640
     [15](file:///C:/Users/febri/projects/EATS_AI/development/YOLOv8_OpenCV/main.py:15) self.input_width = 640

error: OpenCV(4.8.1) [D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\onnx\onnx_importer.cpp:1083](file:///D:/a/opencv-python/opencv-python/opencv/modules/dnn/src/onnx/onnx_importer.cpp:1083): error: (-2:Unspecified error) in function 'cv::dnn::dnn4_v20230620::ONNXImporter::handleNode'
> Node [Floor@ai.onnx]:(onnx_node!/model.10/Floor) parse error: OpenCV(4.8.1) [D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\elementwise_layers.cpp:261](file:///D:/a/opencv-python/opencv-python/opencv/modules/dnn/src/layers/elementwise_layers.cpp:261): error: (-215:Assertion failed) src.size == dst.size && src.type() == dst.type() && src.isContinuous() && dst.isContinuous() && src.type() == CV_32F in function 'cv::dnn::ElementWiseLayer<struct cv::dnn::FloorFunctor>::forward'
>

Am I wrong in exporting the .pt model to .onnx?

Hab2Verer commented 6 months ago

Any updates?

fn-hide commented 6 months ago

Sorry, I made a mistake with my own code after exporting to onnx. Yolov8-face.pt has a key point and I need to parse it which I missed in my first code.

Hab2Verer commented 6 months ago

@fn-hide Can you provide the code to export to onnx, and the usage of the onnx model?

fn-hide commented 6 months ago

Like usual, you can export .pt to .onnx with:

model= YOLO('yolov8n-face.pt')
model.export(format='onnx', opset=12)

If you're not sure about the model, you can validate your exported model using Netron. Just upload your .onnx model then you can view the architecture. I got the code to use exported model in hpc230. Here is the code to use the exported model.

class YOLOv8nFace:
    def __init__(self, path, conf_thres=0.45, iou_thres=0.5):
        self.conf_threshold = conf_thres
        self.iou_threshold = iou_thres
        self.class_names = ['face']
        self.num_classes = len(self.class_names)

        # Initialize model
        self.net = cv.dnn.readNet(path)
        self.input_height = 640
        self.input_width = 640
        self.reg_max = 16

        self.project = np.arange(self.reg_max)
        self.strides = (8, 16, 32)
        self.feats_hw = [
            (math.ceil(self.input_height / self.strides[i]), math.ceil(self.input_width / self.strides[i]))
            for i in range(len(self.strides))
        ]
        self.anchors = self.make_anchors(self.feats_hw)

    def make_anchors(self, feats_hw, grid_cell_offset=0.5):
        """Generate anchors from features."""
        anchor_points = {}
        for i, stride in enumerate(self.strides):
            h, w = feats_hw[i]
            x = np.arange(0, w) + grid_cell_offset                                                             # shift x
            y = np.arange(0, h) + grid_cell_offset                                                             # shift y
            sx, sy = np.meshgrid(x, y)
            # sy, sx = np.meshgrid(y, x)
            anchor_points[stride] = np.stack((sx, sy), axis=-1).reshape(-1, 2)
        return anchor_points

    @staticmethod
    def softmax(x, axis=1):
        x_exp = np.exp(x)
        # 如果是列向量,则axis=0
        x_sum = np.sum(x_exp, axis=axis, keepdims=True)
        s = x_exp / x_sum
        return s

    def resize_image(self, srcimg, keep_ratio=True):
        top, left, newh, neww = 0, 0, self.input_width, self.input_height
        if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
            hw_scale = srcimg.shape[0] / srcimg.shape[1]
            if hw_scale > 1:
                newh, neww = self.input_height, int(self.input_width / hw_scale)
                img = cv.resize(srcimg, (neww, newh), interpolation=cv.INTER_AREA)
                left = int((self.input_width - neww) * 0.5)
                img = cv.copyMakeBorder(
                    img, 0, 0, left, self.input_width - neww - left, cv.BORDER_CONSTANT, value=(0, 0, 0)
                )                                                                                           # add border
            else:
                newh, neww = int(self.input_height * hw_scale), self.input_width
                img = cv.resize(srcimg, (neww, newh), interpolation=cv.INTER_AREA)
                top = int((self.input_height - newh) * 0.5)
                img = cv.copyMakeBorder(
                    img, top, self.input_height - newh - top, 0, 0, cv.BORDER_CONSTANT, value=(0, 0, 0)
                )
        else:
            img = cv.resize(srcimg, (self.input_width, self.input_height), interpolation=cv.INTER_AREA)
        return img, newh, neww, top, left

    def detect(self, srcimg):
        input_img, newh, neww, padh, padw = self.resize_image(cv.cvtColor(srcimg, cv.COLOR_BGR2RGB))
        scale_h, scale_w = srcimg.shape[0] / newh, srcimg.shape[1] / neww
        input_img = input_img.astype(np.float32) / 255.0

        blob = cv.dnn.blobFromImage(input_img)
        self.net.setInput(blob)
        outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
        # if isinstance(outputs, tuple):
        #     outputs = list(outputs)
        # if float(cv.__version__[:3])>=4.7:
        #     outputs = [outputs[2], outputs[0], outputs[1]] ###opencv4.7需要这一步,opencv4.5不需要
        # Perform inference on the image
        det_bboxes, det_conf, det_classid, landmarks = self.post_process(outputs, scale_h, scale_w, padh, padw)

        # return bounding boxes in xywh format
        return det_bboxes, det_conf, det_classid, landmarks

    def post_process(self, preds, scale_h, scale_w, padh, padw):
        bboxes, scores, landmarks = [], [], []
        for i, pred in enumerate(preds):
            stride = int(self.input_height / pred.shape[2])
            pred = pred.transpose((0, 2, 3, 1))

            box = pred[..., :self.reg_max * 4]
            cls = 1 / (1 + np.exp(-pred[..., self.reg_max * 4:-15])).reshape((-1, 1))
            kpts = pred[..., -15:].reshape((-1, 15))                                   # x1,y1,score1, ..., x5,y5,score5

            # tmp = box.reshape(self.feats_hw[i][0], self.feats_hw[i][1], 4, self.reg_max)
            tmp = box.reshape(-1, 4, self.reg_max)
            bbox_pred = self.softmax(tmp, axis=-1)
            bbox_pred = np.dot(bbox_pred, self.project).reshape((-1, 4))

            bbox = self.distance2bbox(self.anchors[stride], bbox_pred,
                                      max_shape=(self.input_height, self.input_width)) * stride
            kpts[:, 0::3] = (kpts[:, 0::3] * 2.0 + (self.anchors[stride][:, 0].reshape((-1, 1)) - 0.5)) * stride
            kpts[:, 1::3] = (kpts[:, 1::3] * 2.0 + (self.anchors[stride][:, 1].reshape((-1, 1)) - 0.5)) * stride
            kpts[:, 2::3] = 1 / (1 + np.exp(-kpts[:, 2::3]))

            bbox -= np.array([[padw, padh, padw, padh]])                                                # 合理使用广播法则
            bbox *= np.array([[scale_w, scale_h, scale_w, scale_h]])
            kpts -= np.tile(np.array([padw, padh, 0]), 5).reshape((1, 15))
            kpts *= np.tile(np.array([scale_w, scale_h, 1]), 5).reshape((1, 15))

            bboxes.append(bbox)
            scores.append(cls)
            landmarks.append(kpts)

        bboxes = np.concatenate(bboxes, axis=0)
        scores = np.concatenate(scores, axis=0)
        landmarks = np.concatenate(landmarks, axis=0)

        bboxes_wh = bboxes.copy()
        bboxes_wh[:, 2:4] = bboxes[:, 2:4] - bboxes[:, 0:2]                                                       # xywh
        class_ids = np.argmax(scores, axis=1)
        confidences = np.max(scores, axis=1)                                                      # max_class_confidence

        mask = confidences > self.conf_threshold
        bboxes_wh = bboxes_wh[mask]                                                                     # 合理使用广播法则
        confidences = confidences[mask]
        class_ids = class_ids[mask]
        landmarks = landmarks[mask]

        indices = cv.dnn.NMSBoxes(
            bboxes_wh.tolist(), confidences.tolist(), self.conf_threshold, self.iou_threshold
        )
        if len(indices) > 0:
            mlvl_bboxes = bboxes_wh[indices]
            confidences = confidences[indices]
            class_ids = class_ids[indices]
            landmarks = landmarks[indices]
            return mlvl_bboxes, confidences, class_ids, landmarks
        else:
            return np.array([]), np.array([]), np.array([]), np.array([])

    @staticmethod
    def distance2bbox(points, distance, max_shape=None):
        x1 = points[:, 0] - distance[:, 0]
        y1 = points[:, 1] - distance[:, 1]
        x2 = points[:, 0] + distance[:, 2]
        y2 = points[:, 1] + distance[:, 3]
        if max_shape is not None:
            x1 = np.clip(x1, 0, max_shape[1])
            y1 = np.clip(y1, 0, max_shape[0])
            x2 = np.clip(x2, 0, max_shape[1])
            y2 = np.clip(y2, 0, max_shape[0])
        return np.stack([x1, y1, x2, y2], axis=-1)

    @staticmethod
    def draw_detections(image, boxes, scores, kpts):
        for box, score, kp in zip(boxes, scores, kpts):
            x, y, w, h = box.astype(int)
            # Draw rectangle
            cv.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), thickness=3)
            cv.putText(
                image,
                "face:" + str(round(score, 2)),
                (x, y - 5),
                cv.FONT_HERSHEY_SIMPLEX,
                1,
                (0, 0, 255),
                thickness=2
            )

            print()
            print('YOLOv8nFace -> draw_detections()')
            for i in range(5):
                x, y, kp_conf = int(kp[i * 3]), int(kp[i * 3 + 1]), kp[i * 3 + 2]
                print(f"{i}. x: {x}, y: {y}, conf: {kp_conf:.2f}")
                cv.circle(image, (x, y), 4, (0, 255, 0), thickness=-1)
                cv.putText(
                    image,
                    str(i),
                    (x, y - 10),
                    cv.FONT_HERSHEY_SIMPLEX,
                    .5,
                    (255, 0, 0),
                    thickness=2
                )
            else:
                print()
        return image

if __name__ == '__main__':
    tic = time.perf_counter()

    # --- YOLOv8nFace Run Example
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--imgpath',
        type=str,
        default='images.jpeg',
        help="image path"
    )
    parser.add_argument(
        '--modelpath',
        type=str,
        default='yolov8n-face.onnx',
        help="onnx filepath"
    )
    parser.add_argument('--confThreshold', default=0.45, type=float, help='class confidence')
    parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
    args = parser.parse_args()

    # Initialize YOLOv8_Face object detector
    YOLOv8_Face_detector = YOLOv8nFace(args.modelpath, conf_thres=args.confThreshold, iou_thres=args.nmsThreshold)
    source_image = cv.imread(args.imgpath)

    # Detect Objects
    result_boxes, result_scores, result_classids, result_kpts = YOLOv8_Face_detector.detect(source_image)

    # Draw detections
    dstimg = YOLOv8_Face_detector.draw_detections(source_image, result_boxes, result_scores, result_kpts)
    # cv.imwrite('result.jpg', dstimg)
    window_name = 'YOLOv8n-Face Inference using ONNX with OpenCV DNN'
    cv.namedWindow(window_name, 0)
    cv.imshow(window_name, dstimg)
    cv.waitKey(0)
    cv.destroyAllWindows()

Hope this help.

Hab2Verer commented 6 months ago

@fn-hide Thanks, this was helpful.