espressif / arduino-esp32

Arduino core for the ESP32
GNU Lesser General Public License v2.1
13.65k stars 7.41k forks source link

Face-Detecton on ESP32-S3 without Wifi / Webserver is very poor #9671

Closed Rob58329 closed 5 months ago

Rob58329 commented 5 months ago

Board

Firebeetle2 ESP32-S3

Device Description

Firebeetle2 ESP32-S3 with OV2640 camera attached.

Hardware Configuration

OV2640 68degree standard camera (note that face-detection seems to work even more poorly with wide-angle cameras!) I also use an active-buzzer on pin A5, but this is not necessary as "Got Face" is also printed to the Serial-terminal.

Version

latest master (checkout manually)

IDE Name

Arduino IDE v1.8.19

Operating System

Windows10

Flash frequency

80Mz

PSRAM enabled

yes

Upload speed

921600

Description

Face-Detecton on ESP32-S3 does not work nearly as well using the below sketch WITHOUT wifi, as it does for the default "Examples/ESP32/Camera/CameraWebServer" wifi example.

This using identical camera-setup (OV2640 68degree standard camera, PIXFORMAT_RGB565, FRAMESIZE_240X240), and identical "TWO_STAGE" face-detection, and running the below "look_for_faces()" task on Core0, which is where I beleive the equivalent "app_httpd.cpp" task is run for the wifi-example.

NB. CONFIG_ESP_FACE_DETECT_ENABLED=1 enabled CONFIG_ESP_FACE_RECOGNITION_ENABLED=0 (ie. detection NOT recognition)

Specifically the below sketch only finds a face if the face is approx 30cm to 45cm away and pretty much in the exact centre of the frame; whereas the "Examples/ESP32/Camera/CameraWebServer" wifi example with identical setup will finds a face pretty reliably between 30cm and 60cm away, and almost anywhere from -22degrees to +22degrees (45degrees total) horizontally from the centre of the frame.

NB. I am using "github.com/espressif/arduino-esp32" as at 16Jan23.

[_as the "Examples/ESP32/Camera/CameraWebServer - FACE_DETECT" doesn't work on the current github when using the above ESP32-S3. On the current github 11May24 you even have to modify the example sketch by moving the WiFi.begin() above the esp_camerainit() or else it wont even connect to the wifi, but even then face-detection doesn't work. (Arduino-on-core1, Events-on-core1-or-core0.)] <- Ignore this as I suspect my github 11May24 software was corrupted.

But note that this below (non-wifi) sketch runs the same on both github v16Jan23 and github v11May24.

Sketch

// Note that this below (non-wifi) sketch runs the same on both github v16Jan23 and gitub v11May24.

#ifndef BOARD_HAS_PSRAM
  #error This sketch needs psram
#endif

#define TWO_STAGE 1
#include "esp_camera.h"
#include <WiFi.h>

#define BEEP_PIN_active A5
#define CAMERA_WIRING_FB2_0V2640
#ifdef CAMERA_WIRING_FB2_0V2640
  #define PWDN_GPIO_NUM     -1
  #define RESET_GPIO_NUM    -1
  #define XCLK_GPIO_NUM     45
  #define SIOD_GPIO_NUM     1
  #define SIOC_GPIO_NUM     2
  #define Y9_GPIO_NUM       48
  #define Y8_GPIO_NUM       46
  #define Y7_GPIO_NUM       8 // A3
  #define Y6_GPIO_NUM       7 // D5
  #define Y5_GPIO_NUM       4 // A0
  #define Y4_GPIO_NUM       41
  #define Y3_GPIO_NUM       40
  #define Y2_GPIO_NUM       39
  #define VSYNC_GPIO_NUM    6 // A2
  #define HREF_GPIO_NUM     42
  #define PCLK_GPIO_NUM     5 // A1

  #include "DFRobot_AXP313A.h"
  DFRobot_AXP313A axp;

#endif 

#include "human_face_detect_msr01.hpp"
#include "human_face_detect_mnp01.hpp"

TaskHandle_t My_Task; 

void setup() {
  WiFi.mode(WIFI_OFF); // Necessary as otherwise seems to effect core0 speed
  pinMode(BEEP_PIN_active, OUTPUT); beep(3); // 100*5 = 1secs
  Serial.begin(115200);
  Serial.println("Start up\n--------");
  Serial.print("setup/loop running on core="); Serial.println(xPortGetCoreID());

  psramInit();
  if (psramFound()) {
    int spare = ESP.getFreePsram();
    Serial.printf("PSRAM=%i/%ikB\n", spare/1024, ESP.getPsramSize()/1024);
    if (spare == 0) Serial.println("Err: PSRAM issue");
  } else Serial.println("Err: PSRAM issue");

  #ifdef CAMERA_WIRING_FB2_0V2640
    Serial.println("Do axp.begin...");
    while (axp.begin()!=0) {Serial.println("\nERR: axp.begin fail\n"); delay(1000);}
    Serial.println("D0 axp.enableCameraPower...");
    axp.enableCameraPower(axp.eOV2640); //Enable the power for camera
    delay(2000);
  #endif

  if (!initialiseCamera()) Serial.println("Err: initialiseCamera FAILED");
  else{
    Serial.print("Camera OK, staring look_for_faces...\n");
    xTaskCreatePinnedToCore(
    look_for_faces
    ,  "look_for_faces"
    ,  (2048+512)  // Stack size // 2048->uxTaskGetStackHighWaterMark=32, with occasional crashes
    ,  NULL
    ,  (configMAX_PRIORITIES-2)  // Priority
    ,  &My_Task
    ,  0); // Core0
  }
}

void loop() {
  delay(8000);
  Serial.printf("uxTaskGetStackHighWaterMark=%i\n",uxTaskGetStackHighWaterMark(My_Task));
}

void beep(int count) { // count==0 gives a short-pip
  int max_x; if (count) max_x = count * 2; else max_x = 2;
  boolean  toggle = true;
  for (int x = 1; x <= max_x; x++) {
    digitalWrite(BEEP_PIN_active, toggle);
    toggle = !toggle;
    if (x < max_x) {if (count) delay(100); else delay(50);}
} }

bool initialiseCamera() {
  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_sscb_sda = SIOD_GPIO_NUM;
  config.pin_sscb_scl = SIOC_GPIO_NUM;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.jpeg_quality = 10; // 0-63 lower number means higher quality (can cause failed image capture if set too low at higher resolutions)
  config.fb_location = CAMERA_FB_IN_PSRAM;
  config.grab_mode = CAMERA_GRAB_LATEST;
  config.fb_count = 1; // if RGB
  config.xclk_freq_hz = 10000000;   // seems better than 20MHz
  config.pixel_format = PIXFORMAT_RGB565;
  config.frame_size = FRAMESIZE_240X240; 
  esp_err_t cam_err = esp_camera_init(&config);  // initialise the camera
  if (cam_err!=ESP_OK) Serial.printf("Err: Camera init failed with error 0x%x\n", cam_err);
  else Serial.println("Camera using RGB at 240x240"); 
  return (cam_err==ESP_OK);
}

void look_for_faces(void * pvParameters) {
  (void) pvParameters;
  Serial.printf("Task is running on core=%i\n",xPortGetCoreID());
  camera_fb_t *fb = NULL;
  #if TWO_STAGE
    HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
    HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
  #else
    #error
  #endif
  for (;;) {
    fb = esp_camera_fb_get();
    if (!fb) Serial.println("Camera capture failed");
    else {
      if ((fb->width) > 400) Serial.println("size too big");
      else {
        if (fb->format == PIXFORMAT_RGB565) {
          #if TWO_STAGE
            std::list<dl::detect::result_t> &candidates = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
            std::list<dl::detect::result_t> &results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, candidates);
          #endif
          if (results.size() > 0) {beep(0); Serial.println("Got Face");}
        }
      }
    }
    if (fb) {esp_camera_fb_return(fb); fb = NULL;}
    // vTaskDelay(1 / portTICK_PERIOD_MS); // 1ms // portTICK_PERIOD_MS=1
  }
}

Debug Message

n/a

Other Steps to Reproduce

No response

I have checked existing issues, online documentation and the Troubleshooting Guide

me-no-dev commented 5 months ago

Please use the latest core (either github with get.py or dev release 3.0.0-rc3). Camera example works fine and you should also have fast detection without the web server.

me-no-dev commented 5 months ago

Here is an example sketch that runs on the latest core and only does face detection (takes 70-120ms depending on format and image)

#include "esp_timer.h"
#include "esp_camera.h"
#include "img_converters.h"
#include <vector>
#include "human_face_detect_msr01.hpp"
#include "human_face_detect_mnp01.hpp"

#define TWO_STAGE 1 /*<! 1: detect by two-stage which is more accurate but slower(with keypoints). */
                    /*<! 0: detect by one-stage which is less accurate but faster(without keypoints). */

#define PWDN_GPIO_NUM  -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM  15
#define SIOD_GPIO_NUM  4
#define SIOC_GPIO_NUM  5

#define Y2_GPIO_NUM 11
#define Y3_GPIO_NUM 9
#define Y4_GPIO_NUM 8
#define Y5_GPIO_NUM 10
#define Y6_GPIO_NUM 12
#define Y7_GPIO_NUM 18
#define Y8_GPIO_NUM 17
#define Y9_GPIO_NUM 16

#define VSYNC_GPIO_NUM 6
#define HREF_GPIO_NUM  7
#define PCLK_GPIO_NUM  13

bool face_detect() {
  int64_t fr_start = esp_timer_get_time();
  camera_fb_t *fb = esp_camera_fb_get();
  if (!fb) {
    Serial.println("Camera capture failed");
    return false;
  }

  size_t out_len, out_width, out_height;
  uint8_t *out_buf;
  bool detected = false;

  if (fb->format == PIXFORMAT_RGB565) {
#if TWO_STAGE
    HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
    HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
    std::list<dl::detect::result_t> &candidates = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
    std::list<dl::detect::result_t> &results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, candidates);
#else
    HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F);
    std::list<dl::detect::result_t> &results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
#endif
    if (results.size() > 0) {
      detected = true;
    }
    esp_camera_fb_return(fb);
  } else {
    out_len = fb->width * fb->height * 3;
    out_width = fb->width;
    out_height = fb->height;
    out_buf = (uint8_t *)malloc(out_len);
    if (!out_buf) {
      Serial.println("out_buf malloc failed");
      return false;
    }
    bool s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf);
    esp_camera_fb_return(fb);
    if (!s) {
      free(out_buf);
      Serial.println("To rgb888 failed");
      return false;
    }

#if TWO_STAGE
    HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
    HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
    std::list<dl::detect::result_t> &candidates = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3});
    std::list<dl::detect::result_t> &results = s2.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}, candidates);
#else
    HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F);
    std::list<dl::detect::result_t> &results = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3});
#endif

    if (results.size() > 0) {
      detected = true;
    }
    free(out_buf);
  }
  int64_t fr_end = esp_timer_get_time();
  Serial.printf("FACE: %lums %s\n", (uint32_t)((fr_end - fr_start) / 1000), detected ? "DETECTED " : "");
  return true;
}

void setup() {
  Serial.begin(115200);
  Serial.setDebugOutput(true);
  Serial.println();

  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_sccb_sda = SIOD_GPIO_NUM;
  config.pin_sccb_scl = SIOC_GPIO_NUM;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.xclk_freq_hz = 20000000;
  config.frame_size = FRAMESIZE_UXGA;
  //config.pixel_format = PIXFORMAT_JPEG;  // for streaming
  config.pixel_format = PIXFORMAT_RGB565; // for face detection/recognition
  config.grab_mode = CAMERA_GRAB_WHEN_EMPTY;
  config.fb_location = CAMERA_FB_IN_PSRAM;
  config.jpeg_quality = 12;
  config.fb_count = 1;

  // if PSRAM IC present, init with UXGA resolution and higher JPEG quality
  //                      for larger pre-allocated frame buffer.
  if (config.pixel_format == PIXFORMAT_JPEG) {
    if (psramFound()) {
      config.jpeg_quality = 10;
      config.fb_count = 2;
      config.grab_mode = CAMERA_GRAB_LATEST;
    } else {
      // Limit the frame size when PSRAM is not available
      config.frame_size = FRAMESIZE_SVGA;
      config.fb_location = CAMERA_FB_IN_DRAM;
    }
  } else {
    // Best option for face detection/recognition
    config.frame_size = FRAMESIZE_240X240;
    config.fb_count = 2;
  }

  // camera init
  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Camera init failed with error 0x%x\n", err);
    return;
  }

  sensor_t *s = esp_camera_sensor_get();
  // initial sensors are flipped vertically and colors are a bit saturated
  if (s->id.PID == OV3660_PID) {
    s->set_vflip(s, 1);        // flip it back
    s->set_brightness(s, 1);   // up the brightness just a bit
    s->set_saturation(s, -2);  // lower the saturation
  }
  // drop down frame size for higher initial frame rate
  if (config.pixel_format == PIXFORMAT_JPEG) {
    s->set_framesize(s, FRAMESIZE_QVGA);
  }

  s->set_vflip(s, 1);

}

void loop() {
  face_detect();
}
Rob58329 commented 5 months ago

@me-no-dev

Very many thanks for the above info. It looks like my recent “github.com/espressif/arduino-esp32” of 11May24 had got corrupted somehow, as the “Examples/ESP32/Camera/CameraWebServer - FACE_DETECT” does work fine using today’s “github.com/espressif/arduino-esp32” (23May23).

Thank you also for your above example sketch. Note that I had to comment-out the “s->set_vflip(s, 1);” line to get it to work with my OV2640 camera, as Face-Detect only works if the face is the correct-way-up, and the current OV2640 camera I am using does NOT need the image inverting. (I have other OV2640 cameras that do require this though.) It might be worth adding a comment regarding this after the “s->set_vflip(s, 1);” line?

With the github software of 23May24 my above sketch actually runs much better than it did on the 16Jan23 github software, and is now pretty much as good as the example-WiFi-Face-Detect, which surprised me as using the 16Jan23 software my above sketch was not nearly as good as the example-WiFi-Face-Detect. I’m not sure why this should be, but I’m happy now using the 23May24 github software!

I seem to get a little better Face-Detection-results using my above sketch (running the Face-Detect on Core0 with priority [configMAX_PRIORITIES-2] and no-wifi), than when using your above sketch running all on Core1. By “Face-Detection-results” I mean I can (in good natural light) pretty reliably detect a face from anywhere between 30cm and 60cm from the camera, and at an angle of about +/- 22 degrees (about 45degrees in total) horizontally from dead-centre on my ~65degrees FOV camera. (It doesn’t work nearly as well in poor light.)

(cf. With your above sketch [all on Core1] I can only get reliable detection between perhaps 30cm and 45cm, and over an angle of perhaps +/- 15degrees, BUT this is rather subjective, so I’m not 100% sure.)

PS. Can you tell me if there is a simpler way to get the centre of the first detected face than calling “draw_face_boxes” and using “x = (int)prediction->box[0];” etc with the formulas:

mid_x = x + (w/2);
mid_y = y + (h/2);

PPS. It might be worth adding your above sketch to the “Examples/ESP32/Camera” folder next to the CameraWebServer.ino example?

omridav commented 1 month ago

@me-no-dev The example you attached is very useful for me, thank you so much. I’m trying to use face recognition without the web browser GUI as well. Do you have an already made example code that I can use? Thank you in advance

me-no-dev commented 1 month ago

no, but you basically need to run the code that is in the single image http handler. That is all.

omridav commented 1 month ago

Thank you again. I'm not sure I understood you correctly, can you please mention the function name as it can be found on the original code? Thanks

me-no-dev commented 1 month ago

Here it is: https://github.com/espressif/arduino-esp32/blob/master/libraries/ESP32/examples/Camera/CameraWebServer/app_httpd.cpp#L332

omridav commented 1 month ago

Thank you so much @me-no-dev ! Thanks to you, and Brent Rubell from Adafruit, I was able to write a code that doing what I wanted it to do - face detection and recognition without web GUI. I'll attach it here if anyone is interesting about it. Anyway, I have 2 errors I couldn't resolve by myself:

  1. "E (570) esp_core_dump_flash: No core dump partition found!" - on every reboot. If I understood correctly this is not an issue, and I can ignore this error.
  2. "cam_hal: EV-VSYNC-OVF": appears once in a while, as soon as face detected by camera. I supposed it related to buffer, but as I said, I couldn't resolve it by myself. I'm using Xiao ESP32S3 Sense, with these parameters: Partition scheme: Maximum App, PSRAM: OPI PSRAM. Thank you in advance! Ino file:

/* This code is based on:

  1. CameraWebServer example
  2. Offline version of it with face detection only (without recognition) - https://github.com/espressif/arduino-esp32/issues/9671#issuecomment-2126825897
  3. Adafruit's example - https://github.com/adafruit/Adafruit_Learning_System_Guides/tree/main/MEMENTO/Memento_Face_Detect_Recognize

This code demonstrate face detection and recognition on Xiao ESP32S3 Sense. Short blink for face detection, long blink for face reconition. For enrolling new face you need to short D10 to GND and release. Then, you need to direct the camera to the face till long blink or Serial print indicate the face enrolled.

Notes:

  1. You can use CameraWebServer example to enroll faces with web monitor
  2. You can use "Erase all flash before sketch upload -> enabled" to remove enrolled faces.
  3. It's recommended to enroll few different ID's from different angles for every face to make it easier to recognized.

Tools configurations for uploading: a. Partition scheme: Maximum App b. PSRAM: OPI PSRAM

Written by Omri David */

include "esp_camera.h"

include "face_recognition_112_v1_s8.hpp"

include "face_recognition_tool.hpp"

include "fb_gfx.h"

include "human_face_detect_mnp01.hpp"

include "human_face_detect_msr01.hpp"

include "ra_filter.h"

include

String sketch_name = "offline_face_detection_recognition9_organized";

//Xiao esp32s3 sense definitions:

define PWDN_GPIO_NUM -1

define RESET_GPIO_NUM -1

define XCLK_GPIO_NUM 10

define SIOD_GPIO_NUM 40

define SIOC_GPIO_NUM 39

define Y9_GPIO_NUM 48

define Y8_GPIO_NUM 11

define Y7_GPIO_NUM 12

define Y6_GPIO_NUM 14

define Y5_GPIO_NUM 16

define Y4_GPIO_NUM 18

define Y3_GPIO_NUM 17

define Y2_GPIO_NUM 15

define VSYNC_GPIO_NUM 38

define HREF_GPIO_NUM 47

define PCLK_GPIO_NUM 13

//for enrolling new faces uint8_t ENROLL_BUTTON = D10;

// The number of faces to save // NOTE - these faces are saved to the ESP32's flash memory and survive between // reboots

define FACE_ID_SAVE_NUMBER 7

// Threshold (0.0 - 1.0) to determine whether the face detected is a positive // match NOTE - This value is adjustable, you may "tune" it for either a more // confident match

define FR_CONFIDENCE_THRESHOLD 0.6

// True if you want to save faces to flash memory and load them on boot, False // otherwise

define SAVE_FACES_TO_FLASH true

/ FR and FD /

// pointer to the camera's framebuffer camera_fb_t *fb = NULL;

// Recognizer model // S8 model - faster but less accurate FaceRecognition112V1S8 recognizer; // Use two-stage fd and weights HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5); bool is_enrolling = false; // 0: not enrolling, 1: enrolling

/**

bool initCamera() { camera_config_t config; config.ledc_channel = LEDC_CHANNEL_0; config.ledc_timer = LEDC_TIMER_0; config.pin_d0 = Y2_GPIO_NUM; config.pin_d1 = Y3_GPIO_NUM; config.pin_d2 = Y4_GPIO_NUM; config.pin_d3 = Y5_GPIO_NUM; config.pin_d4 = Y6_GPIO_NUM; config.pin_d5 = Y7_GPIO_NUM; config.pin_d6 = Y8_GPIO_NUM; config.pin_d7 = Y9_GPIO_NUM; config.pin_xclk = XCLK_GPIO_NUM; config.pin_pclk = PCLK_GPIO_NUM; config.pin_vsync = VSYNC_GPIO_NUM; config.pin_href = HREF_GPIO_NUM; config.pin_sccb_sda = SIOD_GPIO_NUM; config.pin_sccb_scl = SIOC_GPIO_NUM; config.pin_pwdn = PWDN_GPIO_NUM; config.pin_reset = RESET_GPIO_NUM; config.xclk_freq_hz = 20000000; config.grab_mode = CAMERA_GRAB_WHEN_EMPTY; config.fb_location = CAMERA_FB_IN_PSRAM; config.frame_size = FRAMESIZE_QVGA; //FRAMESIZE_240X240; config.pixel_format = PIXFORMAT_RGB565; config.fb_count = 2;

// Initialize the camera esp_err_t err = esp_camera_init(&config); if (err != ESP_OK) { Serial.printf("ERROR: Camera init failed with code 0x%x", err); return false; }

// Configure the camera's sensor sensor_t *s = esp_camera_sensor_get(); // s->set_brightness(s, -1); //causing more: cam_hal: EV-VSYNC-OVF // s->set_contrast(s, 1); //causing more: cam_hal: EV-VSYNC-OVF s->set_vflip(s, 1); s->set_hmirror(s, 0);

return true; }

void setup() { Serial.begin(115200); Serial.setDebugOutput(true); Serial.println("sketch name: " + sketch_name);

pinMode(LED_BUILTIN, OUTPUT); digitalWrite(LED_BUILTIN, HIGH); pinMode(ENROLL_BUTTON, INPUT_PULLUP);

if (!initCamera()) { Serial.println("Camera init failed!"); }

// Initialize face recognition filter and partition ra_filter_init(&ra_filter, 20); recognizer.set_partition(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "fr");

ifdef SAVE_FACES_TO_FLASH

// Optionally load face ids from flash partition recognizer.set_ids_from_flash();

endif

Serial.println("start detecting and recognizing faces"); }

void loop() { // If the enroll button is pressed, enroll a new face if (digitalRead(ENROLL_BUTTON) == LOW) { is_enrolling = true; Serial.println("Enrolling face.."); digitalWrite(LED_BUILTIN, LOW); // turn the LED on (HIGH is the voltage level) delay(10); // wait for a second digitalWrite(LED_BUILTIN, HIGH); // turn the LED off by making the voltage LOW delay(10); }

// capture from the camera into the frame buffer fb = esp_camera_fb_get(); if (!fb) { Serial.printf("ERROR: Camera capture failed\n"); } else { // Face detection: std::list &candidates = s1.infer((uint16_t )fb->buf, { (int)fb->height, (int)fb->width, 3 }); std::list &results = s2.infer( (uint16_t )fb->buf, { (int)fb->height, (int)fb->width, 3 }, candidates); if (results.size() > 0) { Serial.println("Detected face!"); int face_id = 0;

  size_t out_len, out_width, out_height;
  uint8_t *out_buf;
  bool s;

  out_len = fb->width * fb->height * 3;
  out_width = fb->width;
  out_height = fb->height;
  out_buf = (uint8_t *)malloc(out_len);
  if (!out_buf) {
    log_e("out_buf malloc failed");
  }

  //convert to rgb888 for better perfomances
  s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf);
  free(out_buf);
  if (!s) {
    free(out_buf);
    log_e("conversion to rgb888 failed");
  }
  esp_camera_fb_return(fb);

  fb_data_t rfb;
  rfb.width = out_width;
  rfb.height = out_height;
  rfb.data = out_buf;
  rfb.bytes_per_pixel = 3;
  rfb.format = FB_BGR888;

  // Face recognition is SLOW! So, only attempt it if we are enrolling a
  // new face, or have previously enrolled a face
  if (recognizer.get_enrolled_id_num() > 0 || is_enrolling) {
    face_id = run_face_recognition(&rfb, &results);
  } else {
    face_id = 0;
    Serial.println("no enrolled faces -> not running face recognition");
  }
}

} // Release the framebuffer esp_camera_fb_return(fb); }


ra_filter.h (H file):

// SPDX-FileCopyrightText: 2018 me-no-dev for Espressif Systems // // SPDX-License-Identifier: LGPL-2.1-or-later // // Modified by Brent Rubell for Adafruit Industries

// RA Filtering typedef struct { size_t size; // number of values used for filtering size_t index; // current value index size_t count; // value count int sum; int *values; // array to be filled with values } ra_filter_t;

static ra_filter_t ra_filter;

static ra_filter_t ra_filter_init(ra_filter_t filter, size_t sample_size) { memset(filter, 0, sizeof(ra_filter_t));

filter->values = (int )malloc(sample_size sizeof(int)); if (!filter->values) { return NULL; } memset(filter->values, 0, sample_size * sizeof(int));

filter->size = sample_size; return filter; }

static int ra_filter_run(ra_filter_t *filter, int value) { if (!filter->values) { return value; } filter->sum -= filter->values[filter->index]; filter->values[filter->index] = value; filter->sum += filter->values[filter->index]; filter->index++; filter->index = filter->index % filter->size; if (filter->count < filter->size) { filter->count++; } return filter->sum / filter->count; }



Partitions.csv file:
![image](https://github.com/user-attachments/assets/7fb15a1f-9de8-4849-aec6-83c0a233cdb8)