Closed liaoweiguo closed 6 years ago
I haven't worked on getConfidence()
method. It will always return 0. The idea is that getLabel()
will return the name of the recognized person only when its confident enough.
Add getConfidence(), but fail to build, ~
Anyway, ability to tuning the confidence is absolutely necessary. When you have time consider it
class DLibFaceRecognizer
Line 74 std::vector\<float> rec_confidences;
recognizer.h DLibFaceRecognizer::rec Line 180
float confidence; for (size_t i = 0; i < face_descriptors.size(); ++i) { for (size_t j = 0; j < rec_face_descriptors.size(); ++j) { confidence = length(face_descriptors[i]-rec_face_descriptors[j]); if (confidence < 0.5) { LOG(INFO) << rec_names[j]<<" FOUND!!!!"; dlib::rectangle r = frects[i]; rec_rects.push_back(r); rec_labels.push_back(rec_names[j]); rec_confidences.push_back(confidence); } } }
recognizer.h
Last Line
inline std::vector
rebuild the so and it work. You make the threshold to 0.6 , and the lib give a lot of error detection. so the confidence is good for app to fix it
The threshold of 0.6 gives over 99% accuracy as given in this example on the official site. I think the problem is with image quality. It varies with the device hardware. You can try changing some parameters to control image quality as given in readme.
I did all the things but I am getting always Confidence 0.0. I want to get best match in result which is not possible without such a variable who can differentiate these. Please help me to resolve my problem.
My code is here which I did till yet as per above suggestion.
recognizer.h
// Created by Gaurav on Feb 23, 2018
using namespace dlib; using namespace std;
// ResNet network copied from dnn_face_recognition_ex.cpp in dlib/examples
template <template <int,template
template <template <int,template
template <int N, template
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
template
using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything< alevel0< alevel1< alevel2< alevel3< alevel4< max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2, input_rgb_image_sized<150>
;
class DLibFaceRecognizer {
private:
std::string landmark_model;
std::string model_dir_path;
std::string image_dir_path;
std::string dnn_model;
anet_type net;
dlib::shape_predictor sp;
std::unordered_map<int, dlib::full_object_detection> mFaceShapeMap;
dlib::frontal_face_detector face_detector;
std::vector
inline void init() { LOG(INFO) << "init DLibFaceRecognizer"; face_detector = dlib::get_frontal_face_detector(); landmark_model = model_dir_path + "/shape_predictor_5_face_landmarks.dat"; dnn_model = model_dir_path + "/dlib_face_recognition_resnet_model_v1.dat"; image_dir_path = model_dir_path + "/images"; is_training = false; }
public: inline void train() { LOG(INFO) << "train DLibFaceRecognizer"; struct dirent entry; DIR dp;
dp = opendir((image_dir_path).c_str());
if (dp == NULL) {
LOG(INFO) << ("Opendir: Path does not exist or could not be read.");
}
std::vector<matrix<rgb_pixel>> faces;
std::vector<std::string> names;
// load images from dlib image directory and extract faces
while ((entry = readdir(dp))) {
std::string filename = entry->d_name;
if (filename=="." || filename=="..") continue;
cv::Mat file_image = cv::imread(image_dir_path + "/" + filename, CV_LOAD_IMAGE_COLOR);
LOG(INFO) << "Load image " << (entry->d_name);
dlib::cv_image<dlib::bgr_pixel> img(file_image);
std::vector<dlib::rectangle> frects = face_detector(img);
if (frects.size()==1) {
auto face = frects[0];
auto shape = sp(img, face);
matrix<rgb_pixel> face_chip;
extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
faces.push_back(move(face_chip));
names.push_back(filename);
LOG(INFO) << "Added image " << filename;
} else if (frects.size()==0) {
LOG(INFO) << "No face found in image " << filename;
} else {
LOG(INFO) << "More than one face found in image " << filename;
}
}
closedir(dp);
is_training = true;
// calculate face descriptors and set global vars
LOG(INFO) << "Calculating face descriptors " << jniutils::currentDateTime();
rec_face_descriptors = net(faces);
LOG(INFO) << "Calculated face descriptors " << jniutils::currentDateTime()<<" Size "<<rec_face_descriptors.size();
rec_names = names;
is_training = false;
}
DLibFaceRecognizer() { init(); }
DLibFaceRecognizer(const std::string& dlib_rec_example_dir) : model_dir_path(dlib_rec_example_dir) { init(); if (!landmark_model.empty() && jniutils::fileExists(landmark_model) && !dnn_model.empty() && jniutils::fileExists(dnn_model)) { // load the model weights dlib::deserialize(landmark_model) >> sp; dlib::deserialize(dnn_model) >> net; LOG(INFO) << "Models loaded"; } }
inline int rec(const cv::Mat& image) { if (is_training) return 0; if (image.empty()) return 0; if (image.channels() == 1) { cv::cvtColor(image, image, CV_GRAY2BGR); } CHECK(image.channels() == 3);
dlib::cv_image<dlib::bgr_pixel> img(image);
std::vector<matrix<rgb_pixel>> faces;
std::vector<dlib::rectangle> frects = face_detector(img);
for (auto face : frects)
{
auto shape = sp(img, face);
matrix<rgb_pixel> face_chip;
extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
faces.push_back(move(face_chip));
}
if (faces.size() == 0)
{
LOG(INFO) << "No faces found in image!";
}
LOG(INFO) << "calculating face descriptor in image..." << jniutils::currentDateTime();
std::vector<matrix<float,0,1>> face_descriptors = net(faces);
LOG(INFO) << "face descriptors in camera image calculated "<<jniutils::currentDateTime()<<" Size "<<face_descriptors.size();
rec_rects.clear();
rec_labels.clear();
for (size_t i = 0; i < face_descriptors.size(); ++i) {
for (size_t j = 0; j < rec_face_descriptors.size(); ++j) {
float confidence = length(face_descriptors[i]-rec_face_descriptors[j]);
if (confidence < 0.6) {
LOG(INFO) << rec_names[j]<<" FOUND!!!!";
dlib::rectangle r = frects[i];
rec_rects.push_back(r);
rec_labels.push_back(rec_names[j]);
rec_confidences.push_back(confidence);
}
}
}
return rec_rects.size();
}
virtual inline int det(const cv::Mat& image) { if (is_training) return 0; if (image.empty()) return 0; if (image.channels() == 1) { cv::cvtColor(image, image, CV_GRAY2BGR); } CHECK(image.channels() == 3); // TODO : Convert to gray image to speed up detection // It's unnecessary to use color image for face/landmark detection
dlib::cv_image<dlib::bgr_pixel> img(image);
std::vector<matrix<rgb_pixel>> faces;
rects = face_detector(img);
return rects.size();
}
inline std::vector
//------------------------------ jni_face_rec.cpp
/*
using namespace cv;
extern JNI_VisionDetRet* g_pJNI_VisionDetRet;
namespace {
using RecPtr = DLibFaceRecognizer*;
class JNI_FaceRec { public: JNI_FaceRec(JNIEnv* env) { jclass clazz = env->FindClass(CLASSNAME_FACE_REC); mNativeContext = env->GetFieldID(clazz, "mNativeFaceRecContext", "J"); env->DeleteLocalRef(clazz); }
RecPtr getRecognizerPtrFromJava(JNIEnv* env, jobject thiz) { RecPtr const p = (RecPtr)env->GetLongField(thiz, mNativeContext); return p; }
void setRecognizerPtrToJava(JNIEnv* env, jobject thiz, jlong ptr) { env->SetLongField(thiz, mNativeContext, ptr); }
jfieldID mNativeContext; };
// Protect getting/setting and creating/deleting pointer between java/native std::mutex gLock;
std::shared_ptr
RecPtr const getRecPtr(JNIEnv* env, jobject thiz) {
std::lock_guard
// The function to set a pointer to java and delete it if newPtr is empty
void setRecPtr(JNIEnv* env, jobject thiz, RecPtr newPtr) {
std::lock_guard
if (newPtr != JAVA_NULL) { DLOG(INFO) << "setMapManager set new ptr : " << newPtr; }
getJNI_FaceRec(env)->setRecognizerPtrToJava(env, thiz, (jlong)newPtr); }
} // end unnamespace
extern "C" {
Java_com_tzutalin_dlibFaceRec##METHOD_NAME
void JNIEXPORT DLIB_FACE_JNI_METHOD(jniNativeClassInit)(JNIEnv* env, jclass _this) {}
jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getRecResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getRecResultRects()[i]; std::string label = faceRecognizer->getRecResultLabels()[i]; float confidence = faceRecognizer->getRecResultConfidences()[i]; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence); } return jDetRetArray; }
jobjectArray getDetResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getDetResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getDetResultRects()[i]; std::string label = "face"; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); } return jDetRetArray; }
JNIEXPORT jobjectArray JNICALL DLIB_FACE_JNI_METHOD(jniBitmapDetect)(JNIEnv* env, jobject thiz, jobject bitmap) { LOG(INFO) << "jniBitmapFaceDet"; cv::Mat rgbaMat; cv::Mat bgrMat; jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true); cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR); RecPtr mRecPtr = getRecPtr(env, thiz); jint size = mRecPtr->det(bgrMat); LOG(INFO) << "det face size: " << size; return getDetResult(env, mRecPtr, size); }
JNIEXPORT jobjectArray JNICALL DLIB_FACE_JNI_METHOD(jniBitmapRec)(JNIEnv* env, jobject thiz, jobject bitmap) { LOG(INFO) << "jniBitmapFaceDet"; cv::Mat rgbaMat; cv::Mat bgrMat; jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true); cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR); RecPtr mRecPtr = getRecPtr(env, thiz); jint size = mRecPtr->rec(bgrMat); LOG(INFO) << "rec face size: " << size; return getRecResult(env, mRecPtr, size); }
jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniInit)(JNIEnv* env, jobject thiz, jstring jDirPath) { LOG(INFO) << "jniInit"; std::string dirPath = jniutils::convertJStrToString(env, jDirPath); RecPtr mRecPtr = new DLibFaceRecognizer(dirPath); setRecPtr(env, thiz, mRecPtr); return JNI_OK; }
jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniTrain)(JNIEnv* env, jobject thiz) { LOG(INFO) << "jniTrain"; RecPtr mRecPtr = getRecPtr(env, thiz); mRecPtr->train(); return JNI_OK; }
jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniDeInit)(JNIEnv* env, jobject thiz) { LOG(INFO) << "jniDeInit"; setRecPtr(env, thiz, JAVA_NULL); return JNI_OK; }
}
//------------------ VisionDetRet.java
/*
package com.tzutalin.dlib;
/**
import android.graphics.Point;
import java.util.ArrayList;
/**
A VisionDetRet contains all the information identifying the location and confidence value of the detected object in a bitmap.
*/
public final class VisionDetRet {
private String mLabel;
private float mConfidence;
private int mLeft;
private int mTop;
private int mRight;
private int mBottom;
private ArrayList
VisionDetRet() { }
/**
/**
/**
/**
/**
/**
/**
/**
/**
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Left:"); sb.append(mLabel); sb.append(", Top:"); sb.append(mTop); sb.append(", Right:"); sb.append(mRight); sb.append(", Bottom:"); sb.append(mBottom); sb.append(", Label:"); sb.append(mLabel); return sb.toString(); } }
So Please help me with your valuable suggestion what can I do getting accurate match within 250-300 images.
VisionDetRet[] detRets = jniBitmapRec(bitmap);
detRets[i].getConfidence());
return 0.0, I cannot return the best face