cqu20160901 / yolov8seg_rknn_Cplusplus

yolov8seg 瑞芯微 rknn 板端 C++部署,使用平台 rk3588。
BSD 3-Clause "New" or "Revised" License
18 stars 5 forks source link

在预处理阶段增加了letterbox,请问后处理需要在哪个位置修改? #5

Open liuqinglong110 opened 10 months ago

liuqinglong110 commented 10 months ago

作者您好,我根据你之前的代码,在前处理阶段增加了letterbox处理:

        // ************************************************************************************* //
        cv::Mat image = cv::Mat(img_height, img_width, CV_8UC3, data);
        cv::Mat resized_img;
        object_rect effect_roi;
        resize_uniform(image, resized_img, cv::Size(app_ctx.model_width, app_ctx.model_height), effect_roi);
        cv::imwrite("resized_img.jpg", resized_img);
        inputs[0].buf = resized_img.data;
        // ************************************************************************************* //

被调用函数实现如下:

  void resizeWithRGA(cv::Mat &src_img, cv::Mat &dst_img)
    {
        int img_width = src_img.cols;
        int img_height = src_img.rows;
        int img_channel = src_img.channels();
        int dst_width = dst_img.cols;
        int dst_height = dst_img.rows;
        int dst_channel = dst_img.channels();

        rga_buffer_t src;
        rga_buffer_t dst;

        rknn_input inputs[1];
        memset(inputs, 0, sizeof(inputs));
        inputs[0].index = 0;
        inputs[0].type = RKNN_TENSOR_UINT8;
        inputs[0].size = dst_width * dst_height * dst_channel;
        inputs[0].fmt = RKNN_TENSOR_NHWC;
        inputs[0].pass_through = 0;

        // You may not need resize when src resulotion equals to dst resulotion
        void *resize_buf = nullptr;

        if (img_width != dst_width || img_height != dst_height)
        {
            resize_buf = malloc(dst_width * dst_height * dst_channel);
            memset(resize_buf, 0x00, dst_width * dst_height * dst_channel);
            src = wrapbuffer_virtualaddr((void *)src_img.data, img_width, img_height, RK_FORMAT_RGB_888);
            dst = wrapbuffer_virtualaddr((void *)resize_buf, dst_width, dst_height, RK_FORMAT_RGB_888);
            IM_STATUS STATUS = imresize(src, dst);
            inputs[0].buf = resize_buf;
            cv::Mat resize_result = cv::Mat(dst_height, dst_width, CV_8UC3, inputs[0].buf);
            resize_result.copyTo(dst_img);
        }
        else
        {
            // inputs[0].buf = (void *)src_img.data;
            src_img.copyTo(dst_img);
        }
        // Release allocated memory
        if (resize_buf != nullptr)
        {
            std::free(resize_buf);
        }
    }

    int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
    {
        int w = src.cols;
        int h = src.rows;
        int dst_w = dst_size.width;
        int dst_h = dst_size.height;
        //std::cout << "src: (" << h << ", " << w << ")" << std::endl;
        dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(114, 114, 114));

        float ratio_src = w * 1.0 / h;
        float ratio_dst = dst_w * 1.0 / dst_h;

        int tmp_w = 0;
        int tmp_h = 0;
        if (ratio_src > ratio_dst) {
            tmp_w = dst_w;
            tmp_h = floor((dst_w * 1.0 / w) * h);
        }
        else if (ratio_src < ratio_dst) {
            tmp_h = dst_h;
            tmp_w = floor((dst_h * 1.0 / h) * w);
        }
        else {
            // cv::resize(src, dst, dst_size);
            resizeWithRGA(src, dst);  // 使用RGA进行resize.
            effect_area.x = 0;
            effect_area.y = 0;
            effect_area.width = dst_w;
            effect_area.height = dst_h;
            return 0;
        }

        //std::cout << "tmp: (" << tmp_h << ", " << tmp_w << ")" << std::endl;
        cv::Mat tmp = cv::Mat(cv::Size(tmp_w, tmp_h), CV_8UC3, cv::Scalar(114, 114, 114));
        resizeWithRGA(src, tmp);  // 使用RGA进行resize.

        if (tmp_w != dst_w) {
            int index_w = floor((dst_w - tmp_w) / 2.0);
            //std::cout << "index_w: " << index_w << std::endl;
            for (int i = 0; i < dst_h; i++) {
                memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
            }
            effect_area.x = index_w;
            effect_area.y = 0;
            effect_area.width = tmp_w;
            effect_area.height = tmp_h;
        }
        else if (tmp_h != dst_h) {
            int index_h = floor((dst_h - tmp_h) / 2.0);
            //std::cout << "index_h: " << index_h << std::endl;
            memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
            effect_area.x = 0;
            effect_area.y = index_h;
            effect_area.width = tmp_w;
            effect_area.height = tmp_h;
        }
        else {
            printf("error\n");
        }
        //cv::imshow("dst", dst);
        //cv::waitKey(0);
        return 0;
    }

经过测试,缩放的图片是符合预期的。 我用结构体来保存letterbox的信息

    struct object_rect {
        int x;
        int y;
        int width;
        int height;
    };

我正在后处理过程中修改对应代码,以适应letterbox带来的影响。我在GetResultRectYolov8::GetConvDetectionResult函数中修改了几次,结果总是不对。按照我的理解,主要就是在box的生成过程中处理letterbox的变差量,不用单独处理mask。因为每个mask都是由一系列参数组合生成的。 还请作者给指点一下,我生成的结构体letterbox信息应该在后处理的哪些地方修改。感谢。