shalvenlaw / OpenVINO_yolov8

用OpenVINO对yolov8导出的onnx模型进行C++的推理, 任务包括图像分类, 目标识别和语义分割, 步骤包括图片前处理, 推理, NMS等
51 stars 8 forks source link

请问是否有在视频上测试目标检测的推理算法? #3

Open pcycccccc opened 7 months ago

pcycccccc commented 7 months ago

大佬您好,我最近在用openvino对yolov8进行推理,输入的是视频,但在检测中发现前一帧的框会标注完在下一帧里不会消失,我后处理画框的代码与opencv dnn的处理方式并没有什么不同(单写了个画框函数),opencv-dnn的推理视频显示效果是正常的,但openvino显示有点不正常,我没有找到出现bug的原因是什么,所以我不确定是不是openvino本身的问题,想请教一下您,期待您的回复!

shalvenlaw commented 7 months ago

我之前也有通过视频和摄像头取流实时推理, 没发现这些问题呢, 估计不会是openvino的bug, 要不把你的代码给我看一下?

pcycccccc commented 7 months ago

我之前也有通过视频和摄像头获取流实时推理,没发现这些问题呢,估计不会是openvino的bug,要不要把你的代码给我看一下?

好的,十分感谢!

YOLOv8.Cpp代码:

include

include

include

include

include

include <openvino/openvino.hpp>

include <opencv2/opencv.hpp>

include "yolov8.h"

using namespace std; using namespace cv; using namespace dnn;

void convert(const cv::Mat& input, cv::Mat& output, const bool normalize, const bool exchangeRB) { input.convertTo(output, CV_32F); if (normalize) { output = output / 255.0; // 归一化到[0, 1] } if (exchangeRB) { cv::cvtColor(output, output, cv::COLOR_BGR2RGB); } } float fill_tensor_data_image(ov::Tensor& input_tensor, const cv::Mat& input_image) { /// letterbox变换: 不改变宽高比(aspect ratio), 将input_image缩放并放置到blob_image左上角 const ov::Shape tensor_shape = input_tensor.get_shape(); const size_t num_channels = tensor_shape[1]; const size_t height = tensor_shape[2]; const size_t width = tensor_shape[3]; // 缩放因子 const float scale = std::min(height / float(input_image.rows), width / float(input_image.cols)); const cv::Matx23f matrix{ scale, 0.0, 0.0, 0.0, scale, 0.0, }; cv::Mat blob_image; // 下面根据scale范围进行数据转换, 这只是为了提高一点速度(主要是提高了交换通道的速度), 但可读性很差 // 如果不在意这点速度提升的可以固定一种做法(前两个if分支都可以) if (scale > 1.0 + FLT_EPSILON) { // 要放大, 那么先交换通道再放大 convert(input_image, blob_image, true, true); cv::warpAffine(blob_image, blob_image, matrix, cv::Size(width, height)); } else if (scale < 1.0 - FLT_EPSILON) { // 要缩小, 那么先缩小再交换通道 cv::warpAffine(input_image, blob_image, matrix, cv::Size(width, height)); convert(blob_image, blob_image, true, true); } else { convert(input_image, blob_image, true, true); } // cv::imshow("input_image", input_image); // cv::imshow("blob_image", blob_image); // cv::waitKey(0);

    /// 将图像数据填入input_tensor
float* const input_tensor_data = input_tensor.data<float>();
// 原有图片数据为 HWC格式,模型输入节点要求的为 CHW 格式
for (size_t c = 0; c < num_channels; c++) {
    for (size_t h = 0; h < height; h++) {
        for (size_t w = 0; w < width; w++) {
            input_tensor_data[c * width * height + h * width + w] = blob_image.at<cv::Vec<float, 3>>(h, w)[c];
        }
    }
}
return 1 / scale;

}

bool Yolov8Detect::LoadModel(const string& xmlName, string& device) { ov::Core core; //std::shared_ptr model = core.read_model(xmlName);//使用onnx模型进行推理需要添加这一步 // -------- Get OpenVINO runtime version -------- std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;//这里使用的是2023.2.0版本的openvino

/// 载入并编译模型

compiled_model = core.compile_model(xmlName, device);
//compiled_model = core.compile_model(xmlName, device);

infer_request = compiled_model.create_infer_request();
input_tensor = infer_request.get_input_tensor();

return true;

}

bool Yolov8Detect::YoloInfer(const Mat& src, vector& outputs) { const float factor = fill_tensor_data_image(input_tensor, src); // Start inference infer_request.infer();

/// 处理推理计算结果
// 获得推理结果
const ov::Tensor output = infer_request.get_output_tensor();
const ov::Shape output_shape = output.get_shape();
const float* output_buffer = output.data<const float>();

// 解析推理结果
const int out_rows = output_shape[1]; //获得"output"节点的rows
const int out_cols = output_shape[2]; //获得"output"节点的cols

//std::cout << "out_rows:" << out_rows << std::endl;//这里row的值等于6,6=4+2,4表示的是(x,y,z,h),2表示的是class的类别数
//std::cout << "out_cols:" << out_cols << std::endl;//这里cols的值等于8400,即模型输出的那个张量的值

const cv::Mat det_output(out_rows, out_cols, CV_32F, (float*)output_buffer);

std::vector<cv::Rect> boxes;
std::vector<int> class_ids;
std::vector<float> confidences;

// std::cout << "det_output.cols:" << det_output.cols << std::endl; // 输出格式是[84,8400], 每列代表一个框(即最多有8400个框), 前面4行分别是cx, cy, ow, oh, 后面80行是每个类别的置信度 for (int i = 0; i < det_output.cols; ++i) { const cv::Mat classes_scores = det_output.col(i).rowRange(4, 4+ class_names.size()); cv::Point class_id_point; double score; cv::minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);

    // 置信度 0~1之间
    if (score > _classThreshold) {
        const float cx = det_output.at<float>(0, i);
        const float cy = det_output.at<float>(1, i);
        const float ow = det_output.at<float>(2, i);
        const float oh = det_output.at<float>(3, i);
        //cv::Rect box;
        int left = static_cast<int>((cx - 0.5 * ow) * factor);
        int top = static_cast<int>((cy - 0.5 * oh) * factor);
        int width = static_cast<int>(ow * factor);
        int height = static_cast<int>(oh * factor);
        boxes.push_back(Rect(left, top, width, height));
        class_ids.push_back(class_id_point.y);
        confidences.push_back(score);
    }
}
// NMS, 消除具有较低置信度的冗余重叠框
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, _classThreshold, _nmsThreshold, nms_result);
Rect holeImgRect(0, 0, src.cols, src.rows);
//cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, nms_indexes);
for (int i = 0; i < nms_result.size(); ++i) {
    int idx = nms_result[i];
    Output result;
    result._id = class_ids[idx];
    result._confidence = confidences[idx];
    result._box = boxes[idx] & holeImgRect;;
    outputs.push_back(result);
}
if (outputs.size())
    return true;
else
    return false;

} void Yolov8Detect::DrawPred(Mat& img, vector result, std::vector classNames) { for (int i = 0; i < result.size(); i++) { int left, top; left = result[i]._box.x; top = result[i]._box.y; int color_num = i; rectangle(img, result[i]._box, Scalar(0, 255, 0), 2, 8); string label = classNames[result[i]._id] + ":" + to_string(result[i]._confidence); int baseLine; Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); top = max(top, labelSize.height); //rectangle(frame, Point(left, top - int(1.5 labelSize.height)), Point(left + int(1.5 labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED); putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2); } }

YOLOv8.hpp代码:

include

include

include <openvino/openvino.hpp> //openvino header file

include <opencv2/opencv.hpp> //opencv header file

using namespace std; using namespace cv; using namespace dnn;

struct Output { int _id; // 结果类别id float _confidence; // 结果置信度 cv::Rect _box; // 矩形框 };

class Yolov8Detect { public: Yolov8Detect() { } ~Yolov8Detect() {}

bool LoadModel(const string& xmlName, string& device);
bool YoloInfer(const Mat& src, vector<Output>& output);
void DrawPred(cv::Mat& img, std::vector<Output> result,std::vector<std::string> classNames);
//若修改网络的输入大小,需要修改分割分支的输入大小,否则会分割不对
//可以通过查看网络结构来确认,网址:https://netron.app/
//960*960的网络大小,对应的分割输入大小为240*240
//640*640的网络大小,对应的分割输入大小为160*160
const int _netWidth = 640;   //ONNX图片输入宽度
const int _netHeight = 640;  //ONNX图片输入高度

float _classThreshold = 0.25;
float _nmsThreshold = 0.45;

//类别名,自己的模型需要修改此项
ov::InferRequest infer_request;
ov::CompiledModel compiled_model;
ov::Tensor input_tensor;
std::vector<std::string> class_names = { "person"};

};

在main.cpp中我的代码:

include "HK_camera.h"

include "yolov8.h"

include

include

include

include

include

include

include

include <opencv2/opencv.hpp>

using namespace std; using namespace cv; using namespace dnn; //const int limit = 1000; struct Job { cv::Mat image; }; //std::queue jobs1,jobs2,jobs3; std::queue jobs1, jobs2; std::mutex lock1, lock2; std::condition_variable cv1, cv2; vector result1, result2; Yolov8Detect yolo1, yolo2; //Yolov8 task_detect_onnx; //Net net1, net2, net3; int limit = 10; bool stop = false;

void GrabImage1(HK_camera name) { cv::Mat image1; while (cv::waitKey(1) < 0) { name.grabRGBImage1(&image1); //resize(image1, image1, Size(640, 480)); //cv::imshow("jk", image1); //waitKey(1); std::unique_lock l1(lock1); cv1.wait(l1, [&]() { return jobs1.size() < limit; }); jobs1.push(image1); Sleep(20); } }

void GrabImage2(HK_camera name) { cv::Mat image2; while (cv::waitKey(1) < 0) { name.grabRGBImage2(&image2); //resize(image2, image2, Size(640, 480)); //cv::imshow("jk2", image2); //waitKey(1); std::unique_lock l2(lock2); cv2.wait(l2, [&]() { return jobs2.size() < limit; }); jobs2.push(image2); Sleep(20); } }

void DetectPerson1() {

int alarm_num1, danger_num1;
Mat img_camera;
Mat mask_img;
while (true)
{
    if (stop)
        break; //不加线程无法退出
    if (!jobs1.empty())
    {
        std::lock_guard<std::mutex> l1(lock1);
        auto job_cameraImage = jobs1.front();
        jobs1.pop();
        cv1.notify_all();
        img_camera = job_cameraImage;
        //int alarm_num1, danger_num1;
        mask_img = img_camera.clone();

        if (yolo1.YoloInfer(img_camera,result1))
        {
            yolo1.DrawPred(img_camera, result1, yolo1.class_names);
            std::cout << "number of alarm person: " << alarm_num1 << ", number of danger person: " << danger_num1 << std::endl;
        }
        else
        {
            std::cout << "No body!------------1" << std::endl;
        }
        resize(img_camera, img_camera, Size(640, 480));
        imshow("jk1", img_camera);
        waitKey(1);
        Sleep(20);
    }
    std::this_thread::yield(); //不加线程无法退出
}

};

void DetectPerson2() {

int alarm_num2, danger_num2;
Mat img_camera;
Mat mask_img;
while (true)
{
    if (stop)
        break; //不加线程无法退出
    if (!jobs2.empty())
    {
        std::lock_guard<std::mutex> l2(lock2);
        auto job_cameraImage = jobs2.front();
        jobs2.pop();
        cv2.notify_all();
        img_camera = job_cameraImage;
        //int alarm_num1, danger_num1;
        mask_img = img_camera.clone();

        if (yolo2.YoloInfer(img_camera, result2))
        {
            yolo2.DrawPred(img_camera, result2, yolo1.class_names);
            std::cout << "number of alarm person: " << alarm_num2 << ", number of danger person: " << danger_num2 << std::endl;
        }
        else
        {
            std::cout << "No body!------------2" << std::endl;
        }
        resize(img_camera, img_camera, Size(640, 480));
        imshow("jk2", img_camera);
        waitKey(1);
        Sleep(20);
    }
    std::this_thread::yield(); //不加线程无法退出
}

};

int main() { //注意修改模型的路径 static const std::string model_file = "E:/myvs/YOLOv8-CPP-Inference/Detect/openvino/model/chunxin_f32_640.xml"; string device = "CPU";

bool initSegflag1 = yolo1.LoadModel(model_file, device);
if (initSegflag1 == true)
{
    cout << "Load model OK!" << endl;
}
else
{
    cout << "Load model Faild!" << endl;
    return -1;
}
bool initSegflag2 = yolo2.LoadModel(model_file, device);
if (initSegflag2 == true)
{
    cout << "Load model OK!" << endl;
}
else
{
    cout << "Load model Faild!" << endl;
    return -1;
}

HK_camera cam1, cam2;
cam1.Init();
if (cam1.Init() == true)
{
    cout << "init HikSDK success" << endl;
}
else
{
    cout << "init HikSDK fail" << endl;
}
LONG lUserID1 = -1;
LONG lUserID2 = -2;

MyStruct structdata1;
structdata1.lUserId = lUserID1;
structdata1.UserlRealPlayHandle = -1;

MyStruct structdata2;
structdata2.lUserId = lUserID2;
structdata2.UserlRealPlayHandle = -2;

//注册两个相机及开启预览
cam1.Login1("192.168.1.64", "admin", "jkrobot2022", 8000, (void*)(&structdata1));
//cam2.Login2("192.168.1.65", "admin", "jkrobot2022", 8000, (void*)(&structdata2));

Sleep(5000);
//开启线程
//相机一采图和推理
std::thread OpenCam1(GrabImage1, cam1);
std::thread Detect1(DetectPerson1);
//相机二采图和推理
//std::thread OpenCam2(GrabImage2, cam2);

// std::thread Detect2(DetectPerson2);

OpenCam1.join();
Detect1.join();

//OpenCam2.join();
//Detect2.join();

return 0;

}

main里边我开了两个相机来分别推理,我之前opencv-dnn进行相机视频流推理,main.cpp这部分的代码基本上没有太大的变化,且推理线程内的逻辑是一致的,主要是推理方式的变化(一个是opencvdnn 一个是openvino),opencvdnn显示结果OK,但是用openvino推理的话就会出现框累加的情况

pcycccccc commented 7 months ago

因为我的算法是会用到碰撞算法,所以DrawPolygon画图部分的代码,您可以略过不看,可以主要关注openvino的yolov8推理代码和调用方法。我感觉大概率是在yolov8推理代码(yolov8.cpp)代码里,但是我真的不晓得哪里逻辑有问题,望大佬指点!

pcycccccc commented 7 months ago

打扰了,是我在主函数那边用了全局变量vector result1, result2来装结果,但是每次检测完没有clear,把这个变量修改成局部变量就好了!!!大佬,不用帮我看啦!!!

shalvenlaw commented 7 months ago

好的. 不过注意一下fill_tensor_data_image这个函数, 我之前写的有bug, 如果输入图像是640*480程序会崩溃, 你可以改一下:

/*!
 * \brief fill_tensor_data_image 对网络的输入为图片数据的节点进行赋值,实现图片数据输入网络
 * \param input_tensor 输入节点的tensor
 * \param input_image 输入图片的数据
 * \return 缩放因子, 该缩放是为了将input_image塞进input_tensor
 */
float fill_tensor_data_image(ov::Tensor &input_tensor, const cv::Mat &input_image)
{
    /// letterbox变换: 不改变宽高比(aspect ratio), 将input_image缩放并放置到blob_image左上角
    const ov::Shape tensor_shape = input_tensor.get_shape();
    const size_t num_channels = tensor_shape[1];
    const size_t height = tensor_shape[2];
    const size_t width = tensor_shape[3];
    // 缩放因子
    const float scale = std::min(height / float(input_image.rows),
                                 width / float(input_image.cols));
    const cv::Matx23f matrix{
        scale, 0.0, 0.0,
        0.0, scale, 0.0,
    };
    cv::Mat blob_image;
    // 下面根据scale范围进行数据转换, 这只是为了提高一点速度(主要是提高了交换通道的速度)
    // 如果不在意这点速度提升的可以固定一种做法(两个if分支随便一个都可以)
    if (scale < 1.0 - FLT_EPSILON) {
        // 要缩小, 那么先缩小再交换通道
        cv::warpAffine(input_image, blob_image, matrix, cv::Size(width, height));
        convert(blob_image, blob_image, true, true);
    } else {
        // 要放大, 那么先交换通道再放大
        convert(input_image, blob_image, true, true);
        cv::warpAffine(blob_image, blob_image, matrix, cv::Size(width, height));
    }

    /// 将图像数据填入input_tensor
    float *const input_tensor_data = input_tensor.data<float>();
    // 原有图片数据为 HWC格式,模型输入节点要求的为 CHW 格式
    for (size_t c = 0; c < num_channels; c++) {
        for (size_t h = 0; h < height; h++) {
            for (size_t w = 0; w < width; w++) {
                input_tensor_data[c * width * height + h * width + w] = blob_image.at<cv::Vec<float, 3>>(h, w)[c];
            }
        }
    }
    return 1 / scale;
}

主要是要去掉if (scale < 1.0 - FLT_EPSILON) {最后一个else分支

pcycccccc commented 7 months ago

好的,了解了!谢谢大佬