Open pcycccccc opened 7 months ago
我之前也有通过视频和摄像头取流实时推理, 没发现这些问题呢, 估计不会是openvino的bug, 要不把你的代码给我看一下?
我之前也有通过视频和摄像头获取流实时推理,没发现这些问题呢,估计不会是openvino的bug,要不要把你的代码给我看一下?
好的,十分感谢!
YOLOv8.Cpp代码:
using namespace std; using namespace cv; using namespace dnn;
void convert(const cv::Mat& input, cv::Mat& output, const bool normalize, const bool exchangeRB) { input.convertTo(output, CV_32F); if (normalize) { output = output / 255.0; // 归一化到[0, 1] } if (exchangeRB) { cv::cvtColor(output, output, cv::COLOR_BGR2RGB); } } float fill_tensor_data_image(ov::Tensor& input_tensor, const cv::Mat& input_image) { /// letterbox变换: 不改变宽高比(aspect ratio), 将input_image缩放并放置到blob_image左上角 const ov::Shape tensor_shape = input_tensor.get_shape(); const size_t num_channels = tensor_shape[1]; const size_t height = tensor_shape[2]; const size_t width = tensor_shape[3]; // 缩放因子 const float scale = std::min(height / float(input_image.rows), width / float(input_image.cols)); const cv::Matx23f matrix{ scale, 0.0, 0.0, 0.0, scale, 0.0, }; cv::Mat blob_image; // 下面根据scale范围进行数据转换, 这只是为了提高一点速度(主要是提高了交换通道的速度), 但可读性很差 // 如果不在意这点速度提升的可以固定一种做法(前两个if分支都可以) if (scale > 1.0 + FLT_EPSILON) { // 要放大, 那么先交换通道再放大 convert(input_image, blob_image, true, true); cv::warpAffine(blob_image, blob_image, matrix, cv::Size(width, height)); } else if (scale < 1.0 - FLT_EPSILON) { // 要缩小, 那么先缩小再交换通道 cv::warpAffine(input_image, blob_image, matrix, cv::Size(width, height)); convert(blob_image, blob_image, true, true); } else { convert(input_image, blob_image, true, true); } // cv::imshow("input_image", input_image); // cv::imshow("blob_image", blob_image); // cv::waitKey(0);
/// 将图像数据填入input_tensor
float* const input_tensor_data = input_tensor.data<float>();
// 原有图片数据为 HWC格式,模型输入节点要求的为 CHW 格式
for (size_t c = 0; c < num_channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
input_tensor_data[c * width * height + h * width + w] = blob_image.at<cv::Vec<float, 3>>(h, w)[c];
}
}
}
return 1 / scale;
}
bool Yolov8Detect::LoadModel(const string& xmlName, string& device)
{
ov::Core core;
//std::shared_ptr
/// 载入并编译模型
compiled_model = core.compile_model(xmlName, device);
//compiled_model = core.compile_model(xmlName, device);
infer_request = compiled_model.create_infer_request();
input_tensor = infer_request.get_input_tensor();
return true;
}
bool Yolov8Detect::YoloInfer(const Mat& src, vector
/// 处理推理计算结果
// 获得推理结果
const ov::Tensor output = infer_request.get_output_tensor();
const ov::Shape output_shape = output.get_shape();
const float* output_buffer = output.data<const float>();
// 解析推理结果
const int out_rows = output_shape[1]; //获得"output"节点的rows
const int out_cols = output_shape[2]; //获得"output"节点的cols
//std::cout << "out_rows:" << out_rows << std::endl;//这里row的值等于6,6=4+2,4表示的是(x,y,z,h),2表示的是class的类别数
//std::cout << "out_cols:" << out_cols << std::endl;//这里cols的值等于8400,即模型输出的那个张量的值
const cv::Mat det_output(out_rows, out_cols, CV_32F, (float*)output_buffer);
std::vector<cv::Rect> boxes;
std::vector<int> class_ids;
std::vector<float> confidences;
// std::cout << "det_output.cols:" << det_output.cols << std::endl; // 输出格式是[84,8400], 每列代表一个框(即最多有8400个框), 前面4行分别是cx, cy, ow, oh, 后面80行是每个类别的置信度 for (int i = 0; i < det_output.cols; ++i) { const cv::Mat classes_scores = det_output.col(i).rowRange(4, 4+ class_names.size()); cv::Point class_id_point; double score; cv::minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
// 置信度 0~1之间
if (score > _classThreshold) {
const float cx = det_output.at<float>(0, i);
const float cy = det_output.at<float>(1, i);
const float ow = det_output.at<float>(2, i);
const float oh = det_output.at<float>(3, i);
//cv::Rect box;
int left = static_cast<int>((cx - 0.5 * ow) * factor);
int top = static_cast<int>((cy - 0.5 * oh) * factor);
int width = static_cast<int>(ow * factor);
int height = static_cast<int>(oh * factor);
boxes.push_back(Rect(left, top, width, height));
class_ids.push_back(class_id_point.y);
confidences.push_back(score);
}
}
// NMS, 消除具有较低置信度的冗余重叠框
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, _classThreshold, _nmsThreshold, nms_result);
Rect holeImgRect(0, 0, src.cols, src.rows);
//cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, nms_indexes);
for (int i = 0; i < nms_result.size(); ++i) {
int idx = nms_result[i];
Output result;
result._id = class_ids[idx];
result._confidence = confidences[idx];
result._box = boxes[idx] & holeImgRect;;
outputs.push_back(result);
}
if (outputs.size())
return true;
else
return false;
} void Yolov8Detect::DrawPred(Mat& img, vector
YOLOv8.hpp代码:
using namespace std; using namespace cv; using namespace dnn;
struct Output { int _id; // 结果类别id float _confidence; // 结果置信度 cv::Rect _box; // 矩形框 };
class Yolov8Detect { public: Yolov8Detect() { } ~Yolov8Detect() {}
bool LoadModel(const string& xmlName, string& device);
bool YoloInfer(const Mat& src, vector<Output>& output);
void DrawPred(cv::Mat& img, std::vector<Output> result,std::vector<std::string> classNames);
//若修改网络的输入大小,需要修改分割分支的输入大小,否则会分割不对
//可以通过查看网络结构来确认,网址:https://netron.app/
//960*960的网络大小,对应的分割输入大小为240*240
//640*640的网络大小,对应的分割输入大小为160*160
const int _netWidth = 640; //ONNX图片输入宽度
const int _netHeight = 640; //ONNX图片输入高度
float _classThreshold = 0.25;
float _nmsThreshold = 0.45;
//类别名,自己的模型需要修改此项
ov::InferRequest infer_request;
ov::CompiledModel compiled_model;
ov::Tensor input_tensor;
std::vector<std::string> class_names = { "person"};
};
在main.cpp中我的代码:
using namespace std;
using namespace cv;
using namespace dnn;
//const int limit = 1000;
struct Job
{
cv::Mat image;
};
//std::queue
void GrabImage1(HK_camera name)
{
cv::Mat image1;
while (cv::waitKey(1) < 0)
{
name.grabRGBImage1(&image1);
//resize(image1, image1, Size(640, 480));
//cv::imshow("jk", image1);
//waitKey(1);
std::unique_lock
void GrabImage2(HK_camera name)
{
cv::Mat image2;
while (cv::waitKey(1) < 0)
{
name.grabRGBImage2(&image2);
//resize(image2, image2, Size(640, 480));
//cv::imshow("jk2", image2);
//waitKey(1);
std::unique_lock
void DetectPerson1() {
int alarm_num1, danger_num1;
Mat img_camera;
Mat mask_img;
while (true)
{
if (stop)
break; //不加线程无法退出
if (!jobs1.empty())
{
std::lock_guard<std::mutex> l1(lock1);
auto job_cameraImage = jobs1.front();
jobs1.pop();
cv1.notify_all();
img_camera = job_cameraImage;
//int alarm_num1, danger_num1;
mask_img = img_camera.clone();
if (yolo1.YoloInfer(img_camera,result1))
{
yolo1.DrawPred(img_camera, result1, yolo1.class_names);
std::cout << "number of alarm person: " << alarm_num1 << ", number of danger person: " << danger_num1 << std::endl;
}
else
{
std::cout << "No body!------------1" << std::endl;
}
resize(img_camera, img_camera, Size(640, 480));
imshow("jk1", img_camera);
waitKey(1);
Sleep(20);
}
std::this_thread::yield(); //不加线程无法退出
}
};
void DetectPerson2() {
int alarm_num2, danger_num2;
Mat img_camera;
Mat mask_img;
while (true)
{
if (stop)
break; //不加线程无法退出
if (!jobs2.empty())
{
std::lock_guard<std::mutex> l2(lock2);
auto job_cameraImage = jobs2.front();
jobs2.pop();
cv2.notify_all();
img_camera = job_cameraImage;
//int alarm_num1, danger_num1;
mask_img = img_camera.clone();
if (yolo2.YoloInfer(img_camera, result2))
{
yolo2.DrawPred(img_camera, result2, yolo1.class_names);
std::cout << "number of alarm person: " << alarm_num2 << ", number of danger person: " << danger_num2 << std::endl;
}
else
{
std::cout << "No body!------------2" << std::endl;
}
resize(img_camera, img_camera, Size(640, 480));
imshow("jk2", img_camera);
waitKey(1);
Sleep(20);
}
std::this_thread::yield(); //不加线程无法退出
}
};
int main() { //注意修改模型的路径 static const std::string model_file = "E:/myvs/YOLOv8-CPP-Inference/Detect/openvino/model/chunxin_f32_640.xml"; string device = "CPU";
bool initSegflag1 = yolo1.LoadModel(model_file, device);
if (initSegflag1 == true)
{
cout << "Load model OK!" << endl;
}
else
{
cout << "Load model Faild!" << endl;
return -1;
}
bool initSegflag2 = yolo2.LoadModel(model_file, device);
if (initSegflag2 == true)
{
cout << "Load model OK!" << endl;
}
else
{
cout << "Load model Faild!" << endl;
return -1;
}
HK_camera cam1, cam2;
cam1.Init();
if (cam1.Init() == true)
{
cout << "init HikSDK success" << endl;
}
else
{
cout << "init HikSDK fail" << endl;
}
LONG lUserID1 = -1;
LONG lUserID2 = -2;
MyStruct structdata1;
structdata1.lUserId = lUserID1;
structdata1.UserlRealPlayHandle = -1;
MyStruct structdata2;
structdata2.lUserId = lUserID2;
structdata2.UserlRealPlayHandle = -2;
//注册两个相机及开启预览
cam1.Login1("192.168.1.64", "admin", "jkrobot2022", 8000, (void*)(&structdata1));
//cam2.Login2("192.168.1.65", "admin", "jkrobot2022", 8000, (void*)(&structdata2));
Sleep(5000);
//开启线程
//相机一采图和推理
std::thread OpenCam1(GrabImage1, cam1);
std::thread Detect1(DetectPerson1);
//相机二采图和推理
//std::thread OpenCam2(GrabImage2, cam2);
// std::thread Detect2(DetectPerson2);
OpenCam1.join();
Detect1.join();
//OpenCam2.join();
//Detect2.join();
return 0;
}
main里边我开了两个相机来分别推理,我之前opencv-dnn进行相机视频流推理,main.cpp这部分的代码基本上没有太大的变化,且推理线程内的逻辑是一致的,主要是推理方式的变化(一个是opencvdnn 一个是openvino),opencvdnn显示结果OK,但是用openvino推理的话就会出现框累加的情况
因为我的算法是会用到碰撞算法,所以DrawPolygon画图部分的代码,您可以略过不看,可以主要关注openvino的yolov8推理代码和调用方法。我感觉大概率是在yolov8推理代码(yolov8.cpp)代码里,但是我真的不晓得哪里逻辑有问题,望大佬指点!
打扰了,是我在主函数那边用了全局变量vector result1, result2来装结果,但是每次检测完没有clear,把这个变量修改成局部变量就好了!!!大佬,不用帮我看啦!!!
好的. 不过注意一下fill_tensor_data_image
这个函数, 我之前写的有bug, 如果输入图像是640*480程序会崩溃, 你可以改一下:
/*!
* \brief fill_tensor_data_image 对网络的输入为图片数据的节点进行赋值,实现图片数据输入网络
* \param input_tensor 输入节点的tensor
* \param input_image 输入图片的数据
* \return 缩放因子, 该缩放是为了将input_image塞进input_tensor
*/
float fill_tensor_data_image(ov::Tensor &input_tensor, const cv::Mat &input_image)
{
/// letterbox变换: 不改变宽高比(aspect ratio), 将input_image缩放并放置到blob_image左上角
const ov::Shape tensor_shape = input_tensor.get_shape();
const size_t num_channels = tensor_shape[1];
const size_t height = tensor_shape[2];
const size_t width = tensor_shape[3];
// 缩放因子
const float scale = std::min(height / float(input_image.rows),
width / float(input_image.cols));
const cv::Matx23f matrix{
scale, 0.0, 0.0,
0.0, scale, 0.0,
};
cv::Mat blob_image;
// 下面根据scale范围进行数据转换, 这只是为了提高一点速度(主要是提高了交换通道的速度)
// 如果不在意这点速度提升的可以固定一种做法(两个if分支随便一个都可以)
if (scale < 1.0 - FLT_EPSILON) {
// 要缩小, 那么先缩小再交换通道
cv::warpAffine(input_image, blob_image, matrix, cv::Size(width, height));
convert(blob_image, blob_image, true, true);
} else {
// 要放大, 那么先交换通道再放大
convert(input_image, blob_image, true, true);
cv::warpAffine(blob_image, blob_image, matrix, cv::Size(width, height));
}
/// 将图像数据填入input_tensor
float *const input_tensor_data = input_tensor.data<float>();
// 原有图片数据为 HWC格式,模型输入节点要求的为 CHW 格式
for (size_t c = 0; c < num_channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
input_tensor_data[c * width * height + h * width + w] = blob_image.at<cv::Vec<float, 3>>(h, w)[c];
}
}
}
return 1 / scale;
}
主要是要去掉if (scale < 1.0 - FLT_EPSILON) {
最后一个else
分支
好的,了解了!谢谢大佬
大佬您好,我最近在用openvino对yolov8进行推理,输入的是视频,但在检测中发现前一帧的框会标注完在下一帧里不会消失,我后处理画框的代码与opencv dnn的处理方式并没有什么不同(单写了个画框函数),opencv-dnn的推理视频显示效果是正常的,但openvino显示有点不正常,我没有找到出现bug的原因是什么,所以我不确定是不是openvino本身的问题,想请教一下您,期待您的回复!