chuanqi305 / MobileNet-SSD

Caffe implementation of Google MobileNet SSD detection network, with pretrained weights on VOC0712 and mAP=0.727.
MIT License
2.03k stars 1.18k forks source link

Working code, but no detections using C++? #181

Open TNemes-3141 opened 4 years ago

TNemes-3141 commented 4 years ago

Hello,

I want to use MobileNetSDD with C++ and therefore got this code from here: Usage of OpenCV C++ API to perform objection detection using MobileNet and SSD I modified the code slightly to work, you can find it below. My problem is: Using the MobileNetSSD_deploy.prototxt and mobilenet_iter_73000.caffemodel provided, I do not get any detections back (only the first default one).

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utils/trace.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
#define PROJECT_DIR "C:\\Users\\nemes\\source\\repos\\SSD_with_OpenCV\\x64\\Debug\\"

string CLASSES[] = { "background", "aeroplane", "bicycle", "bird", "boat",
    "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
    "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
    "sofa", "train", "tvmonitor" };

int main(int argc, char** argv) {
    CV_TRACE_FUNCTION();
    String modelTxt = (string)PROJECT_DIR + "MobileNetSSD_deploy.prototxt";
    String modelBin = (string)PROJECT_DIR + "mobilenet_iter_73000.caffemodel";

    String imageFile = (argc > 1) ? argv[1] : (string)PROJECT_DIR + "test2.jpg";
    Net net = readNetFromCaffe(modelTxt, modelBin); //here is network initialization
    if (net.empty()) {
        cerr << "Can't load network by using the following files: " << endl;
        cerr << "prototxt:   " << modelTxt << endl;
        cerr << "caffemodel: " << modelBin << endl;
        exit(-1);
    }

    Mat img = imread(imageFile);
    if (img.empty()) {
        cerr << "Can't read image from the file: " << imageFile << endl;
        exit(-1);
    }
    else {
        std::cout << "Succesfully read image from file: " << imageFile << endl;
    }

    Mat img2;
    resize(img, img2, Size(300, 300));
    Mat inputBlob = blobFromImage(img2, 0.007843, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
    net.setInput(inputBlob, "data");
    Mat detection = net.forward("detection_out"); //here the data gets forwardet to the network
    Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); //these are the detections

    ostringstream ss;
    float confidenceThreshold = 0;
    std::cout << "Found " << detectionMat.rows << " detection(s):" << endl;
    std::cout << "ID: " << static_cast<int>(detectionMat.at<float>(0, 1)) << " Confidence: " << static_cast<int>(detectionMat.at<float>(0, 2)) << endl;
    for (int i = 0; i < detectionMat.rows; i++) {
        float confidence = detectionMat.at<float>(i, 2);
        if (confidence > confidenceThreshold) {
            int idx = static_cast<int>(detectionMat.at<float>(i, 1));
            int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * img.cols);
            int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * img.rows);
            int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * img.cols);
            int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * img.rows);

            Rect object((int)xLeftBottom, (int)yLeftBottom,
                (int)(xRightTop - xLeftBottom),
                (int)(yRightTop - yLeftBottom));

            rectangle(img, object, Scalar(0, 255, 0), 2);

            std::cout << CLASSES[idx] << ": " << confidence << endl;

            ss.str("");
            ss << confidence;
            String conf(ss.str());
            String label = CLASSES[idx] + ": " + conf;
            int baseLine = 0;
            Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
            putText(img, label, Point(xLeftBottom, yLeftBottom),
                FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
        }
    }
    vector<int> compression_params;
    compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION);
    compression_params.push_back(9);

    cv::imwrite((string)PROJECT_DIR + "detections.png", img, compression_params);

    std::cout << "Ending..." << endl;
    return 0;
}

So this is my only output:

Found 1 detection(s):
ID: -431602080 Confidence: -431602080

That, obviously, can't be right (on the picture, there are some animals and people). The code is a simplification by the official cpp-example provided by OpenCV, so that works. What could be the problem then?

Thanks for the answers in advance!