Open amir-saniyan opened 3 years ago
The following code detects faces on camera with yufacedetectnet-open-v2.prototxt and yufacedetectnet-open-v2.caffemodel, but how can I predict face landmarks with this Caffe model?
main.cpp:
#include <chrono> #include <cstdlib> #include <iostream> #include <string> #include <opencv2/opencv.hpp> #define MODEL_INPUT_WIDTH 320 #define MODEL_INPUT_HEIGHT 240 #define MODEL_INPUT_CHANNEL 3 #define ESC_KEY_CODE 27 int main(int argc, char* argv[]) { //---------------------------------------------------------------------------------------------------- if((argc < 3) || (argc > 4)) { std::cout << "Usage: ./face-detector <model-prototxt> <model-caffemodel> [<camera-index>]" << std::endl; std::cout << "Example: ./face-detector yufacedetectnet-open-v2.prototxt yufacedetectnet-open-v2.caffemodel -1" << std::endl; return EXIT_FAILURE; } //---------------------------------------------------------------------------------------------------- std::string prototxtFileName = argv[1]; std::string caffemodelFileName = argv[2]; int cameraIndex = -1; if(argc == 4) { cameraIndex = std::stoi(argv[3]); } //---------------------------------------------------------------------------------------------------- std::cout << "Loading model..." << std::endl; cv::dnn::Net net = cv::dnn::readNetFromCaffe(prototxtFileName, caffemodelFileName); net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << std::endl; //---------------------------------------------------------------------------------------------------- std::cout << "Opening camera..." << std::endl; cv::VideoCapture videoCapture; if (!videoCapture.open(cameraIndex)) { std::cout << "Error: Could not open camera: " << cameraIndex << std::endl; return EXIT_FAILURE; } std::cout << std::endl; //---------------------------------------------------------------------------------------------------- std::cout << "Detecting..." << std::endl; while (true) { cv::Mat frame; videoCapture >> frame; if(frame.empty()) { std::cout << "Error: Could not read camera frame." << std::endl; return EXIT_FAILURE; } //cv::resize(frame, frame, cv::Size(MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT)); auto beginTime = std::chrono::steady_clock::now(); auto input = cv::dnn::blobFromImage(frame, 1.0, cv::Size(), cv::Scalar(), true); net.setInput(input, "data"); auto output = net.forward("detection_out"); auto endTime = std::chrono::steady_clock::now(); int timespan = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - beginTime).count(); std::cout << "Detection time = " << timespan << "ms" << std::endl; // output.size = 1 x 1 x 50 x 7 // detectionMat.size = 50 x 7 cv::Mat detectionMat(output.size[2], output.size[3], CV_32F, output.ptr<float>()); for(int i = 0; i < detectionMat.rows; i++) { float confidence = detectionMat.at<float>(i, 2); if(confidence < 0.5) { continue; } int x = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); int y = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); int width = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols + 0.5f) - x; int height = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows + 0.5f) - y; cv::putText(frame, std::to_string(confidence), cv::Point(x, y - 3), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1); cv::rectangle(frame, cv::Rect(x, y, width, height), cv::Scalar(0, 255, 0), 2); } //cv::resize(frame, frame, cv::Size(640, 480)); cv::imshow("Face Detection", frame); if (cv::waitKey(1) == ESC_KEY_CODE) { break; } } std::cout << std::endl; //---------------------------------------------------------------------------------------------------- std::cout << "Releasing camera..." << std::endl; videoCapture.release(); std::cout << std::endl; //---------------------------------------------------------------------------------------------------- return EXIT_SUCCESS; //---------------------------------------------------------------------------------------------------- }
CMakeLists.txt:
cmake_minimum_required(VERSION 3.5) project(face-detector LANGUAGES CXX) set(APP_NAME "${PROJECT_NAME}") find_package(OpenCV REQUIRED) add_executable(${APP_NAME} main.cpp) set_property(TARGET ${APP_NAME} PROPERTY CXX_STANDARD 11) set_property(TARGET ${APP_NAME} PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET ${APP_NAME} PROPERTY CXX_EXTENSIONS OFF) target_link_libraries(${APP_NAME} PRIVATE ${OpenCV_LIBS})
Facial landmarks detection is added in model of v3 as ChangeLog says. If you need the model in caffe's format, you could find a converter to convert our Pytorch model to be of caffe's format.
The following code detects faces on camera with yufacedetectnet-open-v2.prototxt and yufacedetectnet-open-v2.caffemodel, but how can I predict face landmarks with this Caffe model?
main.cpp:
CMakeLists.txt: