ShiqiYu / libfacedetection

An open source library for face detection in images. The face detection speed can reach 1000FPS.
Other
12.24k stars 3.05k forks source link

How to predict face landmarks with Caffe model? #271

Open amir-saniyan opened 3 years ago

amir-saniyan commented 3 years ago

The following code detects faces on camera with yufacedetectnet-open-v2.prototxt and yufacedetectnet-open-v2.caffemodel, but how can I predict face landmarks with this Caffe model?

main.cpp:

#include <chrono>
#include <cstdlib>
#include <iostream>
#include <string>

#include <opencv2/opencv.hpp>

#define MODEL_INPUT_WIDTH 320
#define MODEL_INPUT_HEIGHT 240
#define MODEL_INPUT_CHANNEL 3

#define ESC_KEY_CODE 27

int main(int argc, char* argv[])
{
    //----------------------------------------------------------------------------------------------------

    if((argc < 3) || (argc > 4))
    {
        std::cout << "Usage: ./face-detector <model-prototxt> <model-caffemodel> [<camera-index>]" << std::endl;
        std::cout << "Example: ./face-detector yufacedetectnet-open-v2.prototxt yufacedetectnet-open-v2.caffemodel -1" << std::endl;
        return EXIT_FAILURE;
    }

    //----------------------------------------------------------------------------------------------------

    std::string prototxtFileName = argv[1];
    std::string caffemodelFileName = argv[2];

    int cameraIndex = -1;
    if(argc == 4)
    {
        cameraIndex = std::stoi(argv[3]);
    }

    //----------------------------------------------------------------------------------------------------

    std::cout << "Loading model..." << std::endl;

    cv::dnn::Net net = cv::dnn::readNetFromCaffe(prototxtFileName, caffemodelFileName);

    net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);

    std::cout << std::endl;

    //----------------------------------------------------------------------------------------------------

    std::cout << "Opening camera..." << std::endl;

    cv::VideoCapture videoCapture;

    if (!videoCapture.open(cameraIndex))
    {
        std::cout << "Error: Could not open camera: " << cameraIndex << std::endl;
        return EXIT_FAILURE;
    }

    std::cout << std::endl;

    //----------------------------------------------------------------------------------------------------

    std::cout << "Detecting..." << std::endl;

    while (true)
    {
        cv::Mat frame;
        videoCapture >> frame;

        if(frame.empty())
        {
            std::cout << "Error: Could not read camera frame." << std::endl;
            return EXIT_FAILURE;
        }

        //cv::resize(frame, frame, cv::Size(MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT));

        auto beginTime = std::chrono::steady_clock::now();

        auto input = cv::dnn::blobFromImage(frame, 1.0, cv::Size(), cv::Scalar(), true);

        net.setInput(input, "data");

        auto output = net.forward("detection_out");

        auto endTime = std::chrono::steady_clock::now();

        int timespan = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - beginTime).count();

        std::cout << "Detection time = " << timespan << "ms" << std::endl;

        // output.size = 1 x 1 x 50 x 7
        // detectionMat.size = 50 x 7
        cv::Mat detectionMat(output.size[2], output.size[3], CV_32F, output.ptr<float>());

        for(int i = 0; i < detectionMat.rows; i++)
        {
            float confidence = detectionMat.at<float>(i, 2);

            if(confidence < 0.5)
            {
                continue;
            }

            int x = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
            int y = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
            int width = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols + 0.5f) - x;
            int height = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows + 0.5f) - y;

            cv::putText(frame, std::to_string(confidence), cv::Point(x, y - 3), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);       

            cv::rectangle(frame, cv::Rect(x, y, width, height), cv::Scalar(0, 255, 0), 2);
        }

        //cv::resize(frame, frame, cv::Size(640, 480));

        cv::imshow("Face Detection", frame);

        if (cv::waitKey(1) == ESC_KEY_CODE)
        {
            break;
        }
    }

    std::cout << std::endl;

    //----------------------------------------------------------------------------------------------------

    std::cout << "Releasing camera..." << std::endl;

    videoCapture.release();

    std::cout << std::endl;

    //----------------------------------------------------------------------------------------------------

    return EXIT_SUCCESS;

    //----------------------------------------------------------------------------------------------------
}

CMakeLists.txt:

cmake_minimum_required(VERSION 3.5)

project(face-detector LANGUAGES CXX)

set(APP_NAME "${PROJECT_NAME}")

find_package(OpenCV REQUIRED)

add_executable(${APP_NAME} main.cpp)

set_property(TARGET ${APP_NAME} PROPERTY CXX_STANDARD 11)
set_property(TARGET ${APP_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
set_property(TARGET ${APP_NAME} PROPERTY CXX_EXTENSIONS OFF)

target_link_libraries(${APP_NAME} PRIVATE ${OpenCV_LIBS})
fengyuentau commented 3 years ago

Facial landmarks detection is added in model of v3 as ChangeLog says. If you need the model in caffe's format, you could find a converter to convert our Pytorch model to be of caffe's format.