Closed divineSix closed 5 months ago
For reference, this is the output with yolov5's detect.py
using yolov5l6.pt
.
Closing this issue as it's not related to my inference code, but instead the post process code you have written. That being said, if you provide your full code and model I can take a look.
I've written some very basic post-processing code to read the data from the featureVectors
in your code. For replication, I'm using the yolov5l6 model, exported to onnx from the official ultralytics repository.
This snippet is in main.cpp, just after printing the feature vector.
// POST PROCESSING
std::vector<cv::Rect> bboxes;
std::vector<float> confs; // Overall Conf
std::vector<int> classIds;
std::vector<float> output = featureVectors[0][0];
int dims = 85;
int rows = output.size() / dims;
for (int i=0; i < rows; i++) {
int index = i * dims;
if (output[index+4] <= CONF_THRESH) continue;
int centerX = (int)(output[index]);
int centerY = (int)(output[index+1]);
int width = (int)(output[index+2]);
int height = (int)(output[index+3]);
int left = centerX - width / 2;
int top = centerY - height / 2;
float bestClassProb = 0;
int bestClassId = 5;
for (int j=index+5; j < index+80; j++) {
if (output[j] > bestClassProb) {
bestClassProb = output[j];
bestClassId = j - (index + 5);
}
}
float conf = output[index+4] * bestClassProb;
bboxes.emplace_back(left, top, width, height);
confs.emplace_back(conf);
classIds.emplace_back(bestClassId);
}
std::vector<int> indices;
cv::dnn::NMSBoxes(bboxes, confs, CONF_THRESH, NMS_THRESH, indices);
std::vector<Detection> dets;
for (int idx: indices) {
Detection det;
det.box = cv::Rect(bboxes[idx]);
scaleCoords(cv::Size(640, 640), det.box, cpuImg.size());
det.conf = confs[idx];
det.classId = classIds[idx];
dets.emplace_back(det);
}
visDet(cpuImg, dets);
cv::imwrite("new_output.jpg", cpuImg);
The below snippet is for the related functions.
void visDet(cv::Mat& image, std::vector<Detection>& detections) {
for (const Detection& det : detections) {
cv::rectangle(image, det.box, cv::Scalar(229, 160, 21), 2);
int x = det.box.x;
int y = det.box.y;
int conf = (int)std::round(det.conf * 100);
int classId = det.classId;
std::string label = "cls" + std::to_string(classId) + " " + std::to_string(conf) + "%";
int baseline = 0;
cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.8, 2, &baseline);
cv::rectangle(image,
cv::Point(x, y - 25), cv::Point(x + size.width, y),
cv::Scalar(229, 160, 21), -1);
cv::putText(image, label,
cv::Point(x, y - 3), cv::FONT_ITALIC,
0.8, cv::Scalar(255, 255, 255), 2);
}
}
void scaleCoords(const cv::Size& imageShape, cv::Rect& coords, const cv::Size& imageOriginalShape) {
float gain = std::min((float)imageShape.height / (float)imageOriginalShape.height,
(float)imageShape.width / (float)imageOriginalShape.width);
int pad[2] = {(int) (( (float)imageShape.width - (float)imageOriginalShape.width * gain) / 2.0f),
(int) (( (float)imageShape.height - (float)imageOriginalShape.height * gain) / 2.0f)};
coords.x = (int) std::round(((float)(coords.x - pad[0]) / gain));
coords.y = (int) std::round(((float)(coords.y - pad[1]) / gain));
coords.width = (int) std::round(((float)coords.width / gain));
coords.height = (int) std::round(((float)coords.height / gain));
}
It'd be great if you can share your own post-processing code as well. I can look into it and understand where I might be going wrong.
I've been looking through the code, and applied the NMS code from here.
The outputs are a little bit strange, looking like they've drifted upwards. How do I get this resolved?