Closed Rob58329 closed 5 months ago
Please use the latest core (either github with get.py or dev release 3.0.0-rc3). Camera example works fine and you should also have fast detection without the web server.
Here is an example sketch that runs on the latest core and only does face detection (takes 70-120ms depending on format and image)
#include "esp_timer.h"
#include "esp_camera.h"
#include "img_converters.h"
#include <vector>
#include "human_face_detect_msr01.hpp"
#include "human_face_detect_mnp01.hpp"
#define TWO_STAGE 1 /*<! 1: detect by two-stage which is more accurate but slower(with keypoints). */
/*<! 0: detect by one-stage which is less accurate but faster(without keypoints). */
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 15
#define SIOD_GPIO_NUM 4
#define SIOC_GPIO_NUM 5
#define Y2_GPIO_NUM 11
#define Y3_GPIO_NUM 9
#define Y4_GPIO_NUM 8
#define Y5_GPIO_NUM 10
#define Y6_GPIO_NUM 12
#define Y7_GPIO_NUM 18
#define Y8_GPIO_NUM 17
#define Y9_GPIO_NUM 16
#define VSYNC_GPIO_NUM 6
#define HREF_GPIO_NUM 7
#define PCLK_GPIO_NUM 13
bool face_detect() {
int64_t fr_start = esp_timer_get_time();
camera_fb_t *fb = esp_camera_fb_get();
if (!fb) {
Serial.println("Camera capture failed");
return false;
}
size_t out_len, out_width, out_height;
uint8_t *out_buf;
bool detected = false;
if (fb->format == PIXFORMAT_RGB565) {
#if TWO_STAGE
HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
std::list<dl::detect::result_t> &candidates = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
std::list<dl::detect::result_t> &results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, candidates);
#else
HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F);
std::list<dl::detect::result_t> &results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
#endif
if (results.size() > 0) {
detected = true;
}
esp_camera_fb_return(fb);
} else {
out_len = fb->width * fb->height * 3;
out_width = fb->width;
out_height = fb->height;
out_buf = (uint8_t *)malloc(out_len);
if (!out_buf) {
Serial.println("out_buf malloc failed");
return false;
}
bool s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf);
esp_camera_fb_return(fb);
if (!s) {
free(out_buf);
Serial.println("To rgb888 failed");
return false;
}
#if TWO_STAGE
HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
std::list<dl::detect::result_t> &candidates = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3});
std::list<dl::detect::result_t> &results = s2.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}, candidates);
#else
HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F);
std::list<dl::detect::result_t> &results = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3});
#endif
if (results.size() > 0) {
detected = true;
}
free(out_buf);
}
int64_t fr_end = esp_timer_get_time();
Serial.printf("FACE: %lums %s\n", (uint32_t)((fr_end - fr_start) / 1000), detected ? "DETECTED " : "");
return true;
}
void setup() {
Serial.begin(115200);
Serial.setDebugOutput(true);
Serial.println();
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sccb_sda = SIOD_GPIO_NUM;
config.pin_sccb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.frame_size = FRAMESIZE_UXGA;
//config.pixel_format = PIXFORMAT_JPEG; // for streaming
config.pixel_format = PIXFORMAT_RGB565; // for face detection/recognition
config.grab_mode = CAMERA_GRAB_WHEN_EMPTY;
config.fb_location = CAMERA_FB_IN_PSRAM;
config.jpeg_quality = 12;
config.fb_count = 1;
// if PSRAM IC present, init with UXGA resolution and higher JPEG quality
// for larger pre-allocated frame buffer.
if (config.pixel_format == PIXFORMAT_JPEG) {
if (psramFound()) {
config.jpeg_quality = 10;
config.fb_count = 2;
config.grab_mode = CAMERA_GRAB_LATEST;
} else {
// Limit the frame size when PSRAM is not available
config.frame_size = FRAMESIZE_SVGA;
config.fb_location = CAMERA_FB_IN_DRAM;
}
} else {
// Best option for face detection/recognition
config.frame_size = FRAMESIZE_240X240;
config.fb_count = 2;
}
// camera init
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x\n", err);
return;
}
sensor_t *s = esp_camera_sensor_get();
// initial sensors are flipped vertically and colors are a bit saturated
if (s->id.PID == OV3660_PID) {
s->set_vflip(s, 1); // flip it back
s->set_brightness(s, 1); // up the brightness just a bit
s->set_saturation(s, -2); // lower the saturation
}
// drop down frame size for higher initial frame rate
if (config.pixel_format == PIXFORMAT_JPEG) {
s->set_framesize(s, FRAMESIZE_QVGA);
}
s->set_vflip(s, 1);
}
void loop() {
face_detect();
}
@me-no-dev
Very many thanks for the above info. It looks like my recent “github.com/espressif/arduino-esp32” of 11May24 had got corrupted somehow, as the “Examples/ESP32/Camera/CameraWebServer - FACE_DETECT” does work fine using today’s “github.com/espressif/arduino-esp32” (23May23).
Thank you also for your above example sketch. Note that I had to comment-out the “s->set_vflip(s, 1);” line to get it to work with my OV2640 camera, as Face-Detect only works if the face is the correct-way-up, and the current OV2640 camera I am using does NOT need the image inverting. (I have other OV2640 cameras that do require this though.) It might be worth adding a comment regarding this after the “s->set_vflip(s, 1);” line?
With the github software of 23May24 my above sketch actually runs much better than it did on the 16Jan23 github software, and is now pretty much as good as the example-WiFi-Face-Detect, which surprised me as using the 16Jan23 software my above sketch was not nearly as good as the example-WiFi-Face-Detect. I’m not sure why this should be, but I’m happy now using the 23May24 github software!
I seem to get a little better Face-Detection-results using my above sketch (running the Face-Detect on Core0 with priority [configMAX_PRIORITIES-2] and no-wifi), than when using your above sketch running all on Core1. By “Face-Detection-results” I mean I can (in good natural light) pretty reliably detect a face from anywhere between 30cm and 60cm from the camera, and at an angle of about +/- 22 degrees (about 45degrees in total) horizontally from dead-centre on my ~65degrees FOV camera. (It doesn’t work nearly as well in poor light.)
(cf. With your above sketch [all on Core1] I can only get reliable detection between perhaps 30cm and 45cm, and over an angle of perhaps +/- 15degrees, BUT this is rather subjective, so I’m not 100% sure.)
PS. Can you tell me if there is a simpler way to get the centre of the first detected face than calling “draw_face_boxes” and using “x = (int)prediction->box[0];” etc with the formulas:
mid_x = x + (w/2);
mid_y = y + (h/2);
PPS. It might be worth adding your above sketch to the “Examples/ESP32/Camera” folder next to the CameraWebServer.ino example?
@me-no-dev The example you attached is very useful for me, thank you so much. I’m trying to use face recognition without the web browser GUI as well. Do you have an already made example code that I can use? Thank you in advance
no, but you basically need to run the code that is in the single image http handler. That is all.
Thank you again. I'm not sure I understood you correctly, can you please mention the function name as it can be found on the original code? Thanks
Thank you so much @me-no-dev ! Thanks to you, and Brent Rubell from Adafruit, I was able to write a code that doing what I wanted it to do - face detection and recognition without web GUI. I'll attach it here if anyone is interesting about it. Anyway, I have 2 errors I couldn't resolve by myself:
/* This code is based on:
This code demonstrate face detection and recognition on Xiao ESP32S3 Sense. Short blink for face detection, long blink for face reconition. For enrolling new face you need to short D10 to GND and release. Then, you need to direct the camera to the face till long blink or Serial print indicate the face enrolled.
Notes:
Tools configurations for uploading: a. Partition scheme: Maximum App b. PSRAM: OPI PSRAM
Written by Omri David */
String sketch_name = "offline_face_detection_recognition9_organized";
//Xiao esp32s3 sense definitions:
//for enrolling new faces uint8_t ENROLL_BUTTON = D10;
// The number of faces to save // NOTE - these faces are saved to the ESP32's flash memory and survive between // reboots
// Threshold (0.0 - 1.0) to determine whether the face detected is a positive // match NOTE - This value is adjustable, you may "tune" it for either a more // confident match
// True if you want to save faces to flash memory and load them on boot, False // otherwise
/ FR and FD /
// pointer to the camera's framebuffer camera_fb_t *fb = NULL;
// Recognizer model // S8 model - faster but less accurate FaceRecognition112V1S8 recognizer; // Use two-stage fd and weights HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5); bool is_enrolling = false; // 0: not enrolling, 1: enrolling
/**
@return int The ID of the recognized face.
/
static int run_face_recognition(fb_data_t fb,
std::list
Tensor
int enrolled_count = recognizer.get_enrolled_id_num();
if (enrolled_count < FACE_ID_SAVE_NUMBER && is_enrolling) { int id = recognizer.enroll_id(tensor, landmarks, "", true); (void)id;
Serial.printf("Enrolled ID: %d/n", id); digitalWrite(LED_BUILTIN, LOW); // turn the LED on (HIGH is the voltage level) delay(5000); // wait for a second digitalWrite(LED_BUILTIN, HIGH); // turn the LED off by making the voltage LOW delay(10); is_enrolling = false; } else if (enrolled_count >= FACE_ID_SAVE_NUMBER && is_enrolling) { Serial.println( "ERROR: Already enrolled the maximum number of faces, can not " "enroll more!"); is_enrolling = false; }
face_info_t recognize = recognizer.recognize(tensor, landmarks); if (recognize.id >= 0 && recognize.similarity >= FR_CONFIDENCE_THRESHOLD) { // Face was recognized, print out to serial and blink led // Face was recognized, print out to serial Serial.printf("\t\t\t\tRecognized ID: %d\n", recognize.id); Serial.printf("\t\t\t\twith similarity of: %0.2f\n", recognize.similarity); digitalWrite(LED_BUILTIN, LOW); // turn the LED on (HIGH is the voltage level) delay(1000); // wait for a second digitalWrite(LED_BUILTIN, HIGH); // turn the LED off by making the voltage LOW delay(10); } else if (recognizer.get_enrolled_id_num() > 0) { // Face was not recognized but we have enrolled faces Serial.print("Unknown face - face not recognized as an enrolled face!\n"); Serial.printf("This face has a similarity of: %0.2f\n", recognize.similarity); digitalWrite(LED_BUILTIN, LOW); // turn the LED on (HIGH is the voltage level) delay(10); // wait for a second digitalWrite(LED_BUILTIN, HIGH); // turn the LED off by making the voltage LOW delay(10); } else { // Face was not recognized and we have no faces enrolled Serial.println("Face not recognized, but no face enrolled!"); } return recognize.id; }
bool initCamera() { camera_config_t config; config.ledc_channel = LEDC_CHANNEL_0; config.ledc_timer = LEDC_TIMER_0; config.pin_d0 = Y2_GPIO_NUM; config.pin_d1 = Y3_GPIO_NUM; config.pin_d2 = Y4_GPIO_NUM; config.pin_d3 = Y5_GPIO_NUM; config.pin_d4 = Y6_GPIO_NUM; config.pin_d5 = Y7_GPIO_NUM; config.pin_d6 = Y8_GPIO_NUM; config.pin_d7 = Y9_GPIO_NUM; config.pin_xclk = XCLK_GPIO_NUM; config.pin_pclk = PCLK_GPIO_NUM; config.pin_vsync = VSYNC_GPIO_NUM; config.pin_href = HREF_GPIO_NUM; config.pin_sccb_sda = SIOD_GPIO_NUM; config.pin_sccb_scl = SIOC_GPIO_NUM; config.pin_pwdn = PWDN_GPIO_NUM; config.pin_reset = RESET_GPIO_NUM; config.xclk_freq_hz = 20000000; config.grab_mode = CAMERA_GRAB_WHEN_EMPTY; config.fb_location = CAMERA_FB_IN_PSRAM; config.frame_size = FRAMESIZE_QVGA; //FRAMESIZE_240X240; config.pixel_format = PIXFORMAT_RGB565; config.fb_count = 2;
// Initialize the camera esp_err_t err = esp_camera_init(&config); if (err != ESP_OK) { Serial.printf("ERROR: Camera init failed with code 0x%x", err); return false; }
// Configure the camera's sensor sensor_t *s = esp_camera_sensor_get(); // s->set_brightness(s, -1); //causing more: cam_hal: EV-VSYNC-OVF // s->set_contrast(s, 1); //causing more: cam_hal: EV-VSYNC-OVF s->set_vflip(s, 1); s->set_hmirror(s, 0);
return true; }
void setup() { Serial.begin(115200); Serial.setDebugOutput(true); Serial.println("sketch name: " + sketch_name);
pinMode(LED_BUILTIN, OUTPUT); digitalWrite(LED_BUILTIN, HIGH); pinMode(ENROLL_BUTTON, INPUT_PULLUP);
if (!initCamera()) { Serial.println("Camera init failed!"); }
// Initialize face recognition filter and partition ra_filter_init(&ra_filter, 20); recognizer.set_partition(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "fr");
// Optionally load face ids from flash partition recognizer.set_ids_from_flash();
Serial.println("start detecting and recognizing faces"); }
void loop() { // If the enroll button is pressed, enroll a new face if (digitalRead(ENROLL_BUTTON) == LOW) { is_enrolling = true; Serial.println("Enrolling face.."); digitalWrite(LED_BUILTIN, LOW); // turn the LED on (HIGH is the voltage level) delay(10); // wait for a second digitalWrite(LED_BUILTIN, HIGH); // turn the LED off by making the voltage LOW delay(10); }
// capture from the camera into the frame buffer
fb = esp_camera_fb_get();
if (!fb) {
Serial.printf("ERROR: Camera capture failed\n");
} else {
// Face detection:
std::list
size_t out_len, out_width, out_height;
uint8_t *out_buf;
bool s;
out_len = fb->width * fb->height * 3;
out_width = fb->width;
out_height = fb->height;
out_buf = (uint8_t *)malloc(out_len);
if (!out_buf) {
log_e("out_buf malloc failed");
}
//convert to rgb888 for better perfomances
s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf);
free(out_buf);
if (!s) {
free(out_buf);
log_e("conversion to rgb888 failed");
}
esp_camera_fb_return(fb);
fb_data_t rfb;
rfb.width = out_width;
rfb.height = out_height;
rfb.data = out_buf;
rfb.bytes_per_pixel = 3;
rfb.format = FB_BGR888;
// Face recognition is SLOW! So, only attempt it if we are enrolling a
// new face, or have previously enrolled a face
if (recognizer.get_enrolled_id_num() > 0 || is_enrolling) {
face_id = run_face_recognition(&rfb, &results);
} else {
face_id = 0;
Serial.println("no enrolled faces -> not running face recognition");
}
}
} // Release the framebuffer esp_camera_fb_return(fb); }
ra_filter.h (H file):
// SPDX-FileCopyrightText: 2018 me-no-dev for Espressif Systems // // SPDX-License-Identifier: LGPL-2.1-or-later // // Modified by Brent Rubell for Adafruit Industries
// RA Filtering typedef struct { size_t size; // number of values used for filtering size_t index; // current value index size_t count; // value count int sum; int *values; // array to be filled with values } ra_filter_t;
static ra_filter_t ra_filter;
static ra_filter_t ra_filter_init(ra_filter_t filter, size_t sample_size) { memset(filter, 0, sizeof(ra_filter_t));
filter->values = (int )malloc(sample_size sizeof(int)); if (!filter->values) { return NULL; } memset(filter->values, 0, sample_size * sizeof(int));
filter->size = sample_size; return filter; }
static int ra_filter_run(ra_filter_t *filter, int value) { if (!filter->values) { return value; } filter->sum -= filter->values[filter->index]; filter->values[filter->index] = value; filter->sum += filter->values[filter->index]; filter->index++; filter->index = filter->index % filter->size; if (filter->count < filter->size) { filter->count++; } return filter->sum / filter->count; }
Partitions.csv file:
![image](https://github.com/user-attachments/assets/7fb15a1f-9de8-4849-aec6-83c0a233cdb8)
Board
Firebeetle2 ESP32-S3
Device Description
Firebeetle2 ESP32-S3 with OV2640 camera attached.
Hardware Configuration
OV2640 68degree standard camera (note that face-detection seems to work even more poorly with wide-angle cameras!) I also use an active-buzzer on pin A5, but this is not necessary as "Got Face" is also printed to the Serial-terminal.
Version
latest master (checkout manually)
IDE Name
Arduino IDE v1.8.19
Operating System
Windows10
Flash frequency
80Mz
PSRAM enabled
yes
Upload speed
921600
Description
Face-Detecton on ESP32-S3 does not work nearly as well using the below sketch WITHOUT wifi, as it does for the default "Examples/ESP32/Camera/CameraWebServer" wifi example.
This using identical camera-setup (OV2640 68degree standard camera, PIXFORMAT_RGB565, FRAMESIZE_240X240), and identical "TWO_STAGE" face-detection, and running the below "look_for_faces()" task on Core0, which is where I beleive the equivalent "app_httpd.cpp" task is run for the wifi-example.
NB. CONFIG_ESP_FACE_DETECT_ENABLED=1 enabled CONFIG_ESP_FACE_RECOGNITION_ENABLED=0 (ie. detection NOT recognition)
Specifically the below sketch only finds a face if the face is approx 30cm to 45cm away and pretty much in the exact centre of the frame; whereas the "Examples/ESP32/Camera/CameraWebServer" wifi example with identical setup will finds a face pretty reliably between 30cm and 60cm away, and almost anywhere from -22degrees to +22degrees (45degrees total) horizontally from the centre of the frame.
NB. I am using "github.com/espressif/arduino-esp32" as at 16Jan23.
[_as the "Examples/ESP32/Camera/CameraWebServer - FACE_DETECT" doesn't work on the current github when using the above ESP32-S3. On the current github 11May24 you even have to modify the example sketch by moving the WiFi.begin() above the esp_camerainit() or else it wont even connect to the wifi, but even then face-detection doesn't work. (Arduino-on-core1, Events-on-core1-or-core0.)] <- Ignore this as I suspect my github 11May24 software was corrupted.
But note that this below (non-wifi) sketch runs the same on both github v16Jan23 and github v11May24.
Sketch
Debug Message
Other Steps to Reproduce
No response
I have checked existing issues, online documentation and the Troubleshooting Guide