liaoweiguo commented 6 years ago

VisionDetRet[] detRets = jniBitmapRec(bitmap);

detRets[i].getConfidence());

return 0.0, I cannot return the best face

        results = mFaceRec.recognize(mCroppedBitmap);
        long endTime = System.currentTimeMillis();
        Log.d(TAG, "Time cost: " + String.valueOf((endTime - startTime) / 1000f) + " sec");

        ArrayList<String> names = new ArrayList<>();
        for(VisionDetRet n:results) {
            names.add(n.getLabel() + n.getConfidence());
        }

gv22ga commented 6 years ago

I haven't worked on getConfidence() method. It will always return 0. The idea is that getLabel() will return the name of the recognized person only when its confident enough.

liaoweiguo commented 6 years ago

Add getConfidence(), but fail to build, ~

Anyway, ability to tuning the confidence is absolutely necessary. When you have time consider it

recognizer.h

class DLibFaceRecognizer

Line 74 std::vector\<float> rec_confidences;

recognizer.h DLibFaceRecognizer::rec Line 180

float confidence; for (size_t i = 0; i < face_descriptors.size(); ++i) { for (size_t j = 0; j < rec_face_descriptors.size(); ++j) { confidence = length(face_descriptors[i]-rec_face_descriptors[j]); if (confidence < 0.5) { LOG(INFO) << rec_names[j]<<" FOUND!!!!"; dlib::rectangle r = frects[i]; rec_rects.push_back(r); rec_labels.push_back(rec_names[j]); rec_confidences.push_back(confidence); } } }
recognizer.h

Last Line inline std::vector getRecResultConfidences() { return rec_confidences; }

jni_face_rec.cpp line 92 jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getRecResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getRecResultRects()[i]; std::string label = faceRecognizer->getRecResultLabels()[i]; float confidence = faceRecognizer->getRecResultConfidences()[i]; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence); } return jDetRetArray; }

liaoweiguo commented 6 years ago

rebuild the so and it work. You make the threshold to 0.6 , and the lib give a lot of error detection. so the confidence is good for app to fix it

gv22ga commented 6 years ago

The threshold of 0.6 gives over 99% accuracy as given in this example on the official site. I think the problem is with image quality. It varies with the device hardware. You can try changing some parameters to control image quality as given in readme.

kuldeepNimawat commented 4 years ago

I did all the things but I am getting always Confidence 0.0. I want to get best match in result which is not possible without such a variable who can differentiate these. Please help me to resolve my problem.

kuldeepNimawat commented 4 years ago

My code is here which I did till yet as per above suggestion.

recognizer.h

// Created by Gaurav on Feb 23, 2018

pragma once

include <dlib/dnn.h>

include <dlib/string.h>

include <jni_common/jni_fileutils.h>

include <jni_common/jni_utils.h>

include <dlib/image_processing.h>

include <dlib/image_processing/frontal_face_detector.h>

include <dlib/opencv/cv_image.h>

include <dlib/image_loader/load_image.h>

include <glog/logging.h>

include

include <opencv2/core/core.hpp>

include <opencv2/highgui/highgui.hpp>

include <opencv2/imgproc/imgproc.hpp>

include <opencv2/opencv.hpp>

include

using namespace dlib; using namespace std;

// ResNet network copied from dnn_face_recognition_ex.cpp in dlib/examples template <template <int,templateclass,int,typename> class block, int N, templateclass BN, typename SUBNET> using residual = add_prev1<block<N,BN,1,tag1>>;

template <template <int,templateclass,int,typename> class block, int N, templateclass BN, typename SUBNET> using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1>>>>>;

template <int N, template class BN, int stride, typename SUBNET> using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;

template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;

template using alevel0 = ares_down<256,SUBNET>; template using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>; template using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>; template using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>; template using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;

using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything< alevel0< alevel1< alevel2< alevel3< alevel4< max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2, input_rgb_image_sized<150>

;

class DLibFaceRecognizer { private: std::string landmark_model; std::string model_dir_path; std::string image_dir_path; std::string dnn_model; anet_type net; dlib::shape_predictor sp; std::unordered_map<int, dlib::full_object_detection> mFaceShapeMap; dlib::frontal_face_detector face_detector; std::vector rects; std::vector rec_names; std::vector<matrix<float,0,1>> rec_face_descriptors; std::vector rec_rects; std::vector rec_labels; std::vector rec_confidences; bool is_training;

inline void init() { LOG(INFO) << "init DLibFaceRecognizer"; face_detector = dlib::get_frontal_face_detector(); landmark_model = model_dir_path + "/shape_predictor_5_face_landmarks.dat"; dnn_model = model_dir_path + "/dlib_face_recognition_resnet_model_v1.dat"; image_dir_path = model_dir_path + "/images"; is_training = false; }

public: inline void train() { LOG(INFO) << "train DLibFaceRecognizer"; struct dirent entry; DIR dp;

dp = opendir((image_dir_path).c_str());
if (dp == NULL) {
    LOG(INFO) << ("Opendir: Path does not exist or could not be read.");
}

std::vector<matrix<rgb_pixel>> faces;
std::vector<std::string> names;

// load images from dlib image directory and extract faces
while ((entry = readdir(dp))) {
  std::string filename = entry->d_name;
  if (filename=="." || filename=="..") continue;

  cv::Mat file_image = cv::imread(image_dir_path + "/" + filename, CV_LOAD_IMAGE_COLOR);
  LOG(INFO) << "Load image " << (entry->d_name);
  dlib::cv_image<dlib::bgr_pixel> img(file_image);

  std::vector<dlib::rectangle> frects = face_detector(img);
  if (frects.size()==1) {
    auto face = frects[0];
    auto shape = sp(img, face);
    matrix<rgb_pixel> face_chip;
    extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
    faces.push_back(move(face_chip));
    names.push_back(filename);
    LOG(INFO) << "Added image " << filename;
  } else if (frects.size()==0) {
    LOG(INFO) << "No face found in image " << filename;
  } else {
    LOG(INFO) << "More than one face found in image " << filename;
  }
}
closedir(dp);

is_training = true;
// calculate face descriptors and set global vars
LOG(INFO) << "Calculating face descriptors " << jniutils::currentDateTime();
rec_face_descriptors = net(faces);
LOG(INFO) << "Calculated face descriptors  " << jniutils::currentDateTime()<<" Size "<<rec_face_descriptors.size();
rec_names = names;
is_training = false;

}

DLibFaceRecognizer() { init(); }

DLibFaceRecognizer(const std::string& dlib_rec_example_dir) : model_dir_path(dlib_rec_example_dir) { init(); if (!landmark_model.empty() && jniutils::fileExists(landmark_model) && !dnn_model.empty() && jniutils::fileExists(dnn_model)) { // load the model weights dlib::deserialize(landmark_model) >> sp; dlib::deserialize(dnn_model) >> net; LOG(INFO) << "Models loaded"; } }

inline int rec(const cv::Mat& image) { if (is_training) return 0; if (image.empty()) return 0; if (image.channels() == 1) { cv::cvtColor(image, image, CV_GRAY2BGR); } CHECK(image.channels() == 3);

dlib::cv_image<dlib::bgr_pixel> img(image);

std::vector<matrix<rgb_pixel>> faces;
std::vector<dlib::rectangle> frects = face_detector(img);
for (auto face : frects)
{
  auto shape = sp(img, face);
  matrix<rgb_pixel> face_chip;
  extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
  faces.push_back(move(face_chip));
}

if (faces.size() == 0)
{
  LOG(INFO) << "No faces found in image!";
}
LOG(INFO) << "calculating face descriptor in image..." << jniutils::currentDateTime();
std::vector<matrix<float,0,1>> face_descriptors = net(faces);
LOG(INFO) << "face descriptors in camera image calculated   "<<jniutils::currentDateTime()<<" Size "<<face_descriptors.size();

rec_rects.clear();
rec_labels.clear();
for (size_t i = 0; i < face_descriptors.size();  ++i) {
  for (size_t j = 0; j < rec_face_descriptors.size();  ++j) {
  float confidence = length(face_descriptors[i]-rec_face_descriptors[j]);

   if (confidence < 0.6) {
      LOG(INFO) << rec_names[j]<<" FOUND!!!!";
      dlib::rectangle r = frects[i];
      rec_rects.push_back(r);
      rec_labels.push_back(rec_names[j]);
      rec_confidences.push_back(confidence);
    }
  }
}

return rec_rects.size();

}

virtual inline int det(const cv::Mat& image) { if (is_training) return 0; if (image.empty()) return 0; if (image.channels() == 1) { cv::cvtColor(image, image, CV_GRAY2BGR); } CHECK(image.channels() == 3); // TODO : Convert to gray image to speed up detection // It's unnecessary to use color image for face/landmark detection

dlib::cv_image<dlib::bgr_pixel> img(image);

std::vector<matrix<rgb_pixel>> faces;
rects = face_detector(img);
return rects.size();

}

inline std::vector getRecResultRects() { return rec_rects; } inline std::vector getRecResultLabels() { return rec_labels; } inline std::vector getDetResultRects() { return rects; } inline std::vector getRecResultConfidences() { return rec_confidences; } };

//------------------------------ jni_face_rec.cpp

/*

Created on: Oct 20, 2015
Author: Tzutalin
Copyright (c) 2015 Tzutalin. All rights reserved. */ // Modified by Gaurav on Feb 23, 2018

include <android/bitmap.h>

include <jni_common/jni_bitmap2mat.h>

include <jni_common/jni_primitives.h>

include <jni_common/jni_fileutils.h>

include <jni_common/jni_utils.h>

include

using namespace cv;

extern JNI_VisionDetRet* g_pJNI_VisionDetRet;

namespace {

define JAVA_NULL 0

using RecPtr = DLibFaceRecognizer*;

class JNI_FaceRec { public: JNI_FaceRec(JNIEnv* env) { jclass clazz = env->FindClass(CLASSNAME_FACE_REC); mNativeContext = env->GetFieldID(clazz, "mNativeFaceRecContext", "J"); env->DeleteLocalRef(clazz); }

RecPtr getRecognizerPtrFromJava(JNIEnv* env, jobject thiz) { RecPtr const p = (RecPtr)env->GetLongField(thiz, mNativeContext); return p; }

void setRecognizerPtrToJava(JNIEnv* env, jobject thiz, jlong ptr) { env->SetLongField(thiz, mNativeContext, ptr); }

jfieldID mNativeContext; };

// Protect getting/setting and creating/deleting pointer between java/native std::mutex gLock;

std::shared_ptr getJNI_FaceRec(JNIEnv* env) { static std::once_flag sOnceInitflag; static std::shared_ptr sJNI_FaceRec; std::call_once(sOnceInitflag, [env]() { sJNI_FaceRec = std::make_shared(env); }); return sJNI_FaceRec; }

RecPtr const getRecPtr(JNIEnv* env, jobject thiz) { std::lock_guard lock(gLock); return getJNI_FaceRec(env)->getRecognizerPtrFromJava(env, thiz); }

// The function to set a pointer to java and delete it if newPtr is empty void setRecPtr(JNIEnv* env, jobject thiz, RecPtr newPtr) { std::lock_guard lock(gLock); RecPtr oldPtr = getJNI_FaceRec(env)->getRecognizerPtrFromJava(env, thiz); if (oldPtr != JAVA_NULL) { DLOG(INFO) << "setMapManager delete old ptr : " << oldPtr; delete oldPtr; }

if (newPtr != JAVA_NULL) { DLOG(INFO) << "setMapManager set new ptr : " << newPtr; }

getJNI_FaceRec(env)->setRecognizerPtrToJava(env, thiz, (jlong)newPtr); }

} // end unnamespace

ifdef __cplusplus

extern "C" {

endif

define DLIB_FACE_JNI_METHOD(METHOD_NAME) \

Java_com_tzutalin_dlibFaceRec##METHOD_NAME

void JNIEXPORT DLIB_FACE_JNI_METHOD(jniNativeClassInit)(JNIEnv* env, jclass _this) {}

jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getRecResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getRecResultRects()[i]; std::string label = faceRecognizer->getRecResultLabels()[i]; float confidence = faceRecognizer->getRecResultConfidences()[i]; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence); } return jDetRetArray; }

jobjectArray getDetResult(JNIEnv* env, RecPtr faceRecognizer, const int& size) { LOG(INFO) << "getDetResult"; jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size); for (int i = 0; i < size; i++) { jobject jDetRet = JNI_VisionDetRet::createJObject(env); env->SetObjectArrayElement(jDetRetArray, i, jDetRet); dlib::rectangle rect = faceRecognizer->getDetResultRects()[i]; std::string label = "face"; g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(), rect.right(), rect.bottom()); g_pJNI_VisionDetRet->setLabel(env, jDetRet, label); } return jDetRetArray; }

JNIEXPORT jobjectArray JNICALL DLIB_FACE_JNI_METHOD(jniBitmapDetect)(JNIEnv* env, jobject thiz, jobject bitmap) { LOG(INFO) << "jniBitmapFaceDet"; cv::Mat rgbaMat; cv::Mat bgrMat; jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true); cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR); RecPtr mRecPtr = getRecPtr(env, thiz); jint size = mRecPtr->det(bgrMat); LOG(INFO) << "det face size: " << size; return getDetResult(env, mRecPtr, size); }

JNIEXPORT jobjectArray JNICALL DLIB_FACE_JNI_METHOD(jniBitmapRec)(JNIEnv* env, jobject thiz, jobject bitmap) { LOG(INFO) << "jniBitmapFaceDet"; cv::Mat rgbaMat; cv::Mat bgrMat; jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true); cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR); RecPtr mRecPtr = getRecPtr(env, thiz); jint size = mRecPtr->rec(bgrMat); LOG(INFO) << "rec face size: " << size; return getRecResult(env, mRecPtr, size); }

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniInit)(JNIEnv* env, jobject thiz, jstring jDirPath) { LOG(INFO) << "jniInit"; std::string dirPath = jniutils::convertJStrToString(env, jDirPath); RecPtr mRecPtr = new DLibFaceRecognizer(dirPath); setRecPtr(env, thiz, mRecPtr); return JNI_OK; }

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniTrain)(JNIEnv* env, jobject thiz) { LOG(INFO) << "jniTrain"; RecPtr mRecPtr = getRecPtr(env, thiz); mRecPtr->train(); return JNI_OK; }

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniDeInit)(JNIEnv* env, jobject thiz) { LOG(INFO) << "jniDeInit"; setRecPtr(env, thiz, JAVA_NULL); return JNI_OK; }

ifdef __cplusplus

}

endif

//------------------ VisionDetRet.java

/*

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

package com.tzutalin.dlib;

/**

Created by Tzutalin on 2015/10/20. */

import android.graphics.Point;

import java.util.ArrayList;

/**

A VisionDetRet contains all the information identifying the location and confidence value of the detected object in a bitmap. */ public final class VisionDetRet { private String mLabel; private float mConfidence; private int mLeft; private int mTop; private int mRight; private int mBottom; private ArrayList mLandmarkPoints = new ArrayList<>();

VisionDetRet() { }

/**
- @param label Label name
- @param confidence A confidence factor between 0 and 1. This indicates how certain what has been found is actually the label.
- @param l The X coordinate of the left side of the result
- @param t The Y coordinate of the top of the result
- @param r The X coordinate of the right side of the result
- @param b The Y coordinate of the bottom of the result */ public VisionDetRet(String label, float confidence, int l, int t, int r, int b) { mLabel = label; mLeft = l; mTop = t; mRight = r; mBottom = b; mConfidence = confidence; }
/**
- @return The X coordinate of the left side of the result */ public int getLeft() { return mLeft; }
/**
- @return The Y coordinate of the top of the result */ public int getTop() { return mTop; }
/**
- @return The X coordinate of the right side of the result */ public int getRight() { return mRight; }
/**
- @return The Y coordinate of the bottom of the result */ public int getBottom() { return mBottom; }
/**
- @return A confidence factor between 0 and 1. This indicates how certain what has been found is actually the label. */ public float getConfidence() { return mConfidence; }
/**
- @return The label of the result */ public String getLabel() { return mLabel; }
/**
- Add landmark to the list. Usually, call by jni
- @param x Point x
- @param y Point y
- @return true if adding landmark successfully */ public boolean addLandmark(int x, int y) { return mLandmarkPoints.add(new Point(x, y)); }
/**
- Return the list of landmark points
- @return ArrayList of android.graphics.Point */ public ArrayList getFaceLandmarks() { return mLandmarkPoints; }
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Left:"); sb.append(mLabel); sb.append(", Top:"); sb.append(mTop); sb.append(", Right:"); sb.append(mRight); sb.append(", Bottom:"); sb.append(mBottom); sb.append(", Label:"); sb.append(mLabel); return sb.toString(); } }

So Please help me with your valuable suggestion what can I do getting accurate match within 250-300 images.

gv22ga / dlib-face-recognition-android

recognize result without confidence #10

pragma once

include <dlib/dnn.h>

include <dlib/string.h>

include <jni_common/jni_fileutils.h>

include <jni_common/jni_utils.h>

include <dlib/image_processing.h>

include <dlib/image_processing/frontal_face_detector.h>

include <dlib/opencv/cv_image.h>

include <dlib/image_loader/load_image.h>

include <glog/logging.h>

include

include

include <opencv2/core/core.hpp>

include <opencv2/highgui/highgui.hpp>

include <opencv2/imgproc/imgproc.hpp>

include <opencv2/opencv.hpp>

include

include

include

include

include

include

include <android/bitmap.h>

include <jni_common/jni_bitmap2mat.h>

include <jni_common/jni_primitives.h>

include <jni_common/jni_fileutils.h>

include <jni_common/jni_utils.h>

include

include

define JAVA_NULL 0

ifdef __cplusplus

endif

define DLIB_FACE_JNI_METHOD(METHOD_NAME) \

ifdef __cplusplus

endif