Closed Jayhello closed 6 years ago
Can you post the exact source code for both programs (Python and C++)?
C++ code as below:
#include <iostream>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_io.h>
#include <dlib/matrix.h>
#include <dlib/geometry/vector.h>
#include <dlib/dnn.h>
using namespace dlib;
using namespace std;
typedef matrix<double,0,1> cv;
// this code is copyed from dlib python interface
class face_recognition_model_v1
{
public:
face_recognition_model_v1(const std::string& model_filename)
{
deserialize(model_filename) >> net;
}
matrix<double,0,1> compute_face_descriptor (
matrix<rgb_pixel> img,
const full_object_detection& face,
const int num_jitters
)
{
std::vector<full_object_detection> faces(1, face);
return compute_face_descriptors(img, faces, num_jitters)[0];
}
std::vector<matrix<double,0,1>> compute_face_descriptors (
matrix<rgb_pixel> img,
const std::vector<full_object_detection>& faces,
const int num_jitters
)
{
for (auto& f : faces)
{
if (f.num_parts() != 68 && f.num_parts() != 5)
throw dlib::error("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style.");
}
std::vector<chip_details> dets;
for (auto& f : faces)
dets.push_back(get_face_chip_details(f, 150, 0.25));
dlib::array<matrix<rgb_pixel>> face_chips;
extract_image_chips(img, dets, face_chips);
std::vector<matrix<double,0,1>> face_descriptors;
face_descriptors.reserve(face_chips.size());
if (num_jitters <= 1)
{
// extract descriptors and convert from float vectors to double vectors
for (auto& d : net(face_chips,16))
face_descriptors.push_back(matrix_cast<double>(d));
}
else
{
for (auto& fimg : face_chips)
face_descriptors.push_back(matrix_cast<double>(mean(mat(net(jitter_image(fimg,num_jitters),16)))));
}
return face_descriptors;
}
private:
dlib::rand rnd;
std::vector<matrix<rgb_pixel>> jitter_image(
const matrix<rgb_pixel>& img,
const int num_jitters
)
{
std::vector<matrix<rgb_pixel>> crops;
for (int i = 0; i < num_jitters; ++i)
crops.push_back(dlib::jitter_image(img,rnd));
return crops;
}
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>;
template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>;
template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>;
template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>;
template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;
using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything<
alevel0<
alevel1<
alevel2<
alevel3<
alevel4<
max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2,
input_rgb_image_sized<150>
>>>>>>>>>>>>;
anet_type net;
};
// the main code of c++ compute_face_descriptor
int main(int argc, char ** argv) {
// test for the same image, with only one face
std::string img_path = "/data/service/face_rec/face_yy_sample/399879996/1804c26f4110409b5f768c85cd0588c24bbd726f39cf.jpg";
// face detector
dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
std::string sp_path = "/home/xy/anaconda2/lib/python2.7/site-packages/face_recognition_models/models/shape_predictor_5_face_landmarks.dat";
dlib::shape_predictor sp;
dlib::deserialize(sp_path) >> sp;
std::string face_rec_path = "/home/xy/anaconda2/lib/python2.7/site-packages/face_recognition_models/models/dlib_face_recognition_resnet_model_v1.dat";
face_recognition_model_v1 face_encoder = face_recognition_model_v1(face_rec_path);
// Now we will go ask the shape_predictor to tell us the pose of
// each face we detected.
std::vector<dlib::full_object_detection> shapes;
dlib::matrix<dlib::rgb_pixel> img;
dlib::load_image(img, img_path);
std::vector<dlib::rectangle> dets = detector(img, 1);
std::cout << "Number of faces detected: " << dets.size() << std::endl;
// Number of faces detected: 1
dlib::full_object_detection shape = sp(img, dets[0]); // only one face
std::cout<<trans(face_encoder.compute_face_descriptor(img, shape, 1))<<std::endl;
// output as below, I only get first 10 elements
// -0.0446148 0.117586 0.00275135 -0.0249811 -0.0658778 -0.023674 -0.0102546 -0.101928 0.112667 -0.0391978 .........
return 0;
}
python code from dlib
# coding:utf-8
import dlib
import face_recognition
def test_img_encoding():
# the same image like c++ code with only face
img_path = "/data/service/face_rec/face_yy_sample/399879996/1804c26f4110409b5f768c85cd0588c24bbd726f39cf.jpg"
predictor_path = "/home/xy/anaconda2/lib/python2.7/site-packages/face_recognition_models/models/shape_predictor_5_face_landmarks.dat"
face_rec_model_path = "/home/xy/anaconda2/lib/python2.7/site-packages/face_recognition_models/models/dlib_face_recognition_resnet_model_v1.dat"
# Load all the models we need: a detector to find the faces, a shape predictor
# to find face landmarks so we can precisely localize the face, and finally the
# face recognition model.
detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor(predictor_path)
facerec = dlib.face_recognition_model_v1(face_rec_model_path)
img = face_recognition.load_image_file(img_path)
# dets = detector(img, 1)
dets = detector(img, 0) # do not upsample the image
for k, d in enumerate(dets):
shape = sp(img, d)
face_descriptor = facerec.compute_face_descriptor(img, shape)
print(" ".join(str(item) for item in face_descriptor))
# -0.0440603867173 0.123068407178 0.00691157858819 -0.0269216317683 -0.0613840222359 -0.0236160680652 -0.0131134930998 -0.104631096125 0.112277835608 -0.0405800752342 .......
if __name__ == '__main__':
test_img_encoding()
As see above, the output 128 vector is not the same.
Does any wrong, I do ?
Thank you very much for you reply.
@ageitgey
Can you give any ideas, thank you?
@Jayhello did you figure it out?
+1
I wish there were more guidance on this. There are still unanswered questions: https://stackoverflow.com/questions/52554798/dlib-python-face-encoding-vs-c-face-encoding
Can you try loading an image in C++ and Python and verifying that the image arrays are equal? It's possible the image loading code is doing something slightly different in each case.
I am loading image with PIL in python and cv2 in c++. May be this is causing my matrix to be different.
@ageitgey I did already check a few pixels, and they were fine, but I should do an exhaustive test. I'll try that as soon as I can and get back to you. I know that differences in colour model when loading images might cause the pixel values to be different (eg: https://stackoverflow.com/questions/39649292/imageio-reading-slightly-different-rgb-values-than-other-methods/).
Description
I use the
I know the corresponding code in C++, http://dlib.net/dnn_face_recognition_ex.cpp.html
the model, are downloaded when install face_recognition, like "/home/xy/anaconda2/lib/python2.7/site-packages/face_recognition_models/models/dlib_face_recognition_resnet_model_v1.dat"
And I have also change
num_jitters=1
in c++, for the same image, it's encoding output as below:the below is python:
can you give any advise ?