gv22ga / dlib-face-recognition-android

Android app to demo dlib face recognition
MIT License
148 stars 50 forks source link

recognize result without confidence #10

Closed liaoweiguo closed 6 years ago

liaoweiguo commented 6 years ago

VisionDetRet[] detRets = jniBitmapRec(bitmap);

detRets[i].getConfidence());

return 0.0, I cannot return the best face

        results = mFaceRec.recognize(mCroppedBitmap);
        long endTime = System.currentTimeMillis();
        Log.d(TAG, "Time cost: " + String.valueOf((endTime - startTime) / 1000f) + " sec");

        ArrayList<String> names = new ArrayList<>();
        for(VisionDetRet n:results) {
            names.add(n.getLabel() + n.getConfidence());
        }
gv22ga commented 6 years ago

I haven't worked on getConfidence() method. It will always return 0. The idea is that getLabel() will return the name of the recognized person only when its confident enough.

liaoweiguo commented 6 years ago

Add getConfidence(), but fail to build, ~

Anyway, ability to tuning the confidence is absolutely necessary. When you have time consider it

  1. recognizer.h

class DLibFaceRecognizer

Line 74 std::vector\<float> rec_confidences;

  1. recognizer.h DLibFaceRecognizer::rec Line 180

    float confidence; for (size_t i = 0; i < face_descriptors.size(); ++i) { for (size_t j = 0; j < rec_face_descriptors.size(); ++j) { confidence = length(face_descriptors[i]-rec_face_descriptors[j]); if (confidence < 0.5) { LOG(INFO) << rec_names[j]<<" FOUND!!!!"; dlib::rectangle r = frects[i]; rec_rects.push_back(r); rec_labels.push_back(rec_names[j]); rec_confidences.push_back(confidence); } } }

  2. recognizer.h

Last Line inline std::vector getRecResultConfidences() { return rec_confidences; }

  1. jni_face_rec.cpp line 92 jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getRecResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getRecResultRects()[i]; std::string label = faceRecognizer->getRecResultLabels()[i]; float confidence = faceRecognizer->getRecResultConfidences()[i]; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence); } return jDetRetArray; }
liaoweiguo commented 6 years ago

rebuild the so and it work. You make the threshold to 0.6 , and the lib give a lot of error detection. so the confidence is good for app to fix it

gv22ga commented 6 years ago

The threshold of 0.6 gives over 99% accuracy as given in this example on the official site. I think the problem is with image quality. It varies with the device hardware. You can try changing some parameters to control image quality as given in readme.

kuldeepNimawat commented 4 years ago

I did all the things but I am getting always Confidence 0.0. I want to get best match in result which is not possible without such a variable who can differentiate these. Please help me to resolve my problem.

kuldeepNimawat commented 4 years ago

My code is here which I did till yet as per above suggestion.

recognizer.h

// Created by Gaurav on Feb 23, 2018

pragma once

include <dlib/dnn.h>

include <dlib/string.h>

include <jni_common/jni_fileutils.h>

include <jni_common/jni_utils.h>

include <dlib/image_processing.h>

include <dlib/image_processing/frontal_face_detector.h>

include <dlib/opencv/cv_image.h>

include <dlib/image_loader/load_image.h>

include <glog/logging.h>

include

include

include <opencv2/core/core.hpp>

include <opencv2/highgui/highgui.hpp>

include <opencv2/imgproc/imgproc.hpp>

include <opencv2/opencv.hpp>

include

include

include

include

include

include

using namespace dlib; using namespace std;

// ResNet network copied from dnn_face_recognition_ex.cpp in dlib/examples template <template <int,templateclass,int,typename> class block, int N, templateclass BN, typename SUBNET> using residual = add_prev1<block<N,BN,1,tag1>>;

template <template <int,templateclass,int,typename> class block, int N, templateclass BN, typename SUBNET> using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1>>>>>;

template <int N, template class BN, int stride, typename SUBNET> using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;

template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;

template using alevel0 = ares_down<256,SUBNET>; template using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>; template using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>; template using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>; template using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;

using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything< alevel0< alevel1< alevel2< alevel3< alevel4< max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2, input_rgb_image_sized<150>

;

class DLibFaceRecognizer { private: std::string landmark_model; std::string model_dir_path; std::string image_dir_path; std::string dnn_model; anet_type net; dlib::shape_predictor sp; std::unordered_map<int, dlib::full_object_detection> mFaceShapeMap; dlib::frontal_face_detector face_detector; std::vector rects; std::vector rec_names; std::vector<matrix<float,0,1>> rec_face_descriptors; std::vector rec_rects; std::vector rec_labels; std::vector rec_confidences; bool is_training;

inline void init() { LOG(INFO) << "init DLibFaceRecognizer"; face_detector = dlib::get_frontal_face_detector(); landmark_model = model_dir_path + "/shape_predictor_5_face_landmarks.dat"; dnn_model = model_dir_path + "/dlib_face_recognition_resnet_model_v1.dat"; image_dir_path = model_dir_path + "/images"; is_training = false; }

public: inline void train() { LOG(INFO) << "train DLibFaceRecognizer"; struct dirent entry; DIR dp;

dp = opendir((image_dir_path).c_str());
if (dp == NULL) {
    LOG(INFO) << ("Opendir: Path does not exist or could not be read.");
}

std::vector<matrix<rgb_pixel>> faces;
std::vector<std::string> names;

// load images from dlib image directory and extract faces
while ((entry = readdir(dp))) {
  std::string filename = entry->d_name;
  if (filename=="." || filename=="..") continue;

  cv::Mat file_image = cv::imread(image_dir_path + "/" + filename, CV_LOAD_IMAGE_COLOR);
  LOG(INFO) << "Load image " << (entry->d_name);
  dlib::cv_image<dlib::bgr_pixel> img(file_image);

  std::vector<dlib::rectangle> frects = face_detector(img);
  if (frects.size()==1) {
    auto face = frects[0];
    auto shape = sp(img, face);
    matrix<rgb_pixel> face_chip;
    extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
    faces.push_back(move(face_chip));
    names.push_back(filename);
    LOG(INFO) << "Added image " << filename;
  } else if (frects.size()==0) {
    LOG(INFO) << "No face found in image " << filename;
  } else {
    LOG(INFO) << "More than one face found in image " << filename;
  }
}
closedir(dp);

is_training = true;
// calculate face descriptors and set global vars
LOG(INFO) << "Calculating face descriptors " << jniutils::currentDateTime();
rec_face_descriptors = net(faces);
LOG(INFO) << "Calculated face descriptors  " << jniutils::currentDateTime()<<" Size "<<rec_face_descriptors.size();
rec_names = names;
is_training = false;

}

DLibFaceRecognizer() { init(); }

DLibFaceRecognizer(const std::string& dlib_rec_example_dir) : model_dir_path(dlib_rec_example_dir) { init(); if (!landmark_model.empty() && jniutils::fileExists(landmark_model) && !dnn_model.empty() && jniutils::fileExists(dnn_model)) { // load the model weights dlib::deserialize(landmark_model) >> sp; dlib::deserialize(dnn_model) >> net; LOG(INFO) << "Models loaded"; } }

inline int rec(const cv::Mat& image) { if (is_training) return 0; if (image.empty()) return 0; if (image.channels() == 1) { cv::cvtColor(image, image, CV_GRAY2BGR); } CHECK(image.channels() == 3);

dlib::cv_image<dlib::bgr_pixel> img(image);

std::vector<matrix<rgb_pixel>> faces;
std::vector<dlib::rectangle> frects = face_detector(img);
for (auto face : frects)
{
  auto shape = sp(img, face);
  matrix<rgb_pixel> face_chip;
  extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
  faces.push_back(move(face_chip));
}

if (faces.size() == 0)
{
  LOG(INFO) << "No faces found in image!";
}
LOG(INFO) << "calculating face descriptor in image..." << jniutils::currentDateTime();
std::vector<matrix<float,0,1>> face_descriptors = net(faces);
LOG(INFO) << "face descriptors in camera image calculated   "<<jniutils::currentDateTime()<<" Size "<<face_descriptors.size();

rec_rects.clear();
rec_labels.clear();
for (size_t i = 0; i < face_descriptors.size();  ++i) {
  for (size_t j = 0; j < rec_face_descriptors.size();  ++j) {
  float confidence = length(face_descriptors[i]-rec_face_descriptors[j]);

   if (confidence < 0.6) {
      LOG(INFO) << rec_names[j]<<" FOUND!!!!";
      dlib::rectangle r = frects[i];
      rec_rects.push_back(r);
      rec_labels.push_back(rec_names[j]);
      rec_confidences.push_back(confidence);
    }
  }
}

return rec_rects.size();

}

virtual inline int det(const cv::Mat& image) { if (is_training) return 0; if (image.empty()) return 0; if (image.channels() == 1) { cv::cvtColor(image, image, CV_GRAY2BGR); } CHECK(image.channels() == 3); // TODO : Convert to gray image to speed up detection // It's unnecessary to use color image for face/landmark detection

dlib::cv_image<dlib::bgr_pixel> img(image);

std::vector<matrix<rgb_pixel>> faces;
rects = face_detector(img);
return rects.size();

}

inline std::vector getRecResultRects() { return rec_rects; } inline std::vector getRecResultLabels() { return rec_labels; } inline std::vector getDetResultRects() { return rects; } inline std::vector getRecResultConfidences() { return rec_confidences; } };

//------------------------------ jni_face_rec.cpp

/*

include <android/bitmap.h>

include <jni_common/jni_bitmap2mat.h>

include <jni_common/jni_primitives.h>

include <jni_common/jni_fileutils.h>

include <jni_common/jni_utils.h>

include

include

using namespace cv;

extern JNI_VisionDetRet* g_pJNI_VisionDetRet;

namespace {

define JAVA_NULL 0

using RecPtr = DLibFaceRecognizer*;

class JNI_FaceRec { public: JNI_FaceRec(JNIEnv* env) { jclass clazz = env->FindClass(CLASSNAME_FACE_REC); mNativeContext = env->GetFieldID(clazz, "mNativeFaceRecContext", "J"); env->DeleteLocalRef(clazz); }

RecPtr getRecognizerPtrFromJava(JNIEnv* env, jobject thiz) { RecPtr const p = (RecPtr)env->GetLongField(thiz, mNativeContext); return p; }

void setRecognizerPtrToJava(JNIEnv* env, jobject thiz, jlong ptr) { env->SetLongField(thiz, mNativeContext, ptr); }

jfieldID mNativeContext; };

// Protect getting/setting and creating/deleting pointer between java/native std::mutex gLock;

std::shared_ptr getJNI_FaceRec(JNIEnv* env) { static std::once_flag sOnceInitflag; static std::shared_ptr sJNI_FaceRec; std::call_once(sOnceInitflag, [env]() { sJNI_FaceRec = std::make_shared(env); }); return sJNI_FaceRec; }

RecPtr const getRecPtr(JNIEnv* env, jobject thiz) { std::lock_guard lock(gLock); return getJNI_FaceRec(env)->getRecognizerPtrFromJava(env, thiz); }

// The function to set a pointer to java and delete it if newPtr is empty void setRecPtr(JNIEnv* env, jobject thiz, RecPtr newPtr) { std::lock_guard lock(gLock); RecPtr oldPtr = getJNI_FaceRec(env)->getRecognizerPtrFromJava(env, thiz); if (oldPtr != JAVA_NULL) { DLOG(INFO) << "setMapManager delete old ptr : " << oldPtr; delete oldPtr; }

if (newPtr != JAVA_NULL) { DLOG(INFO) << "setMapManager set new ptr : " << newPtr; }

getJNI_FaceRec(env)->setRecognizerPtrToJava(env, thiz, (jlong)newPtr); }

} // end unnamespace

ifdef __cplusplus

extern "C" {

endif

define DLIB_FACE_JNI_METHOD(METHOD_NAME) \

Java_com_tzutalin_dlibFaceRec##METHOD_NAME

void JNIEXPORT DLIB_FACE_JNI_METHOD(jniNativeClassInit)(JNIEnv* env, jclass _this) {}

jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getRecResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getRecResultRects()[i]; std::string label = faceRecognizer->getRecResultLabels()[i]; float confidence = faceRecognizer->getRecResultConfidences()[i]; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence); } return jDetRetArray; }

jobjectArray getDetResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getDetResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getDetResultRects()[i]; std::string label = "face"; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); } return jDetRetArray; }

JNIEXPORT jobjectArray JNICALL DLIB_FACE_JNI_METHOD(jniBitmapDetect)(JNIEnv* env, jobject thiz, jobject bitmap) { LOG(INFO) << "jniBitmapFaceDet"; cv::Mat rgbaMat; cv::Mat bgrMat; jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true); cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR); RecPtr mRecPtr = getRecPtr(env, thiz); jint size = mRecPtr->det(bgrMat); LOG(INFO) << "det face size: " << size; return getDetResult(env, mRecPtr, size); }

JNIEXPORT jobjectArray JNICALL DLIB_FACE_JNI_METHOD(jniBitmapRec)(JNIEnv* env, jobject thiz, jobject bitmap) { LOG(INFO) << "jniBitmapFaceDet"; cv::Mat rgbaMat; cv::Mat bgrMat; jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true); cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR); RecPtr mRecPtr = getRecPtr(env, thiz); jint size = mRecPtr->rec(bgrMat); LOG(INFO) << "rec face size: " << size; return getRecResult(env, mRecPtr, size); }

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniInit)(JNIEnv* env, jobject thiz, jstring jDirPath) { LOG(INFO) << "jniInit"; std::string dirPath = jniutils::convertJStrToString(env, jDirPath); RecPtr mRecPtr = new DLibFaceRecognizer(dirPath); setRecPtr(env, thiz, mRecPtr); return JNI_OK; }

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniTrain)(JNIEnv* env, jobject thiz) { LOG(INFO) << "jniTrain"; RecPtr mRecPtr = getRecPtr(env, thiz); mRecPtr->train(); return JNI_OK; }

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniDeInit)(JNIEnv* env, jobject thiz) { LOG(INFO) << "jniDeInit"; setRecPtr(env, thiz, JAVA_NULL); return JNI_OK; }

ifdef __cplusplus

}

endif

//------------------ VisionDetRet.java

/*

package com.tzutalin.dlib;

/**

import android.graphics.Point;

import java.util.ArrayList;

/**

So Please help me with your valuable suggestion what can I do getting accurate match within 250-300 images.