Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gaze Tracking, Gesture Recognition
Originally posted by **StillTravelling** May 20, 2024
I'm not sure if this is a bug or not but in order for face detection to work consistently I'm having to pad out a HD image. The incoming video is 25 fps. Using human 3.2.2 on node.
Here's my code:
```
const Human = require('C:\\TestNode\\node_modules\\@vladmandic\\human\\dist\\human.node-gpu.js').default;
const humanConfig = {
modelBasePath: 'file://hmodels',
debug: false,
async: true,
filter: { enabled: false },
cacheSensitivity : 0.9,
//skipAllowed: true,
//skipFrames: 200,
face: {
enabled: true,
detector: { enabled: true, maxDetected: 1, rotation: false, minConfidence: 0.8 },
attention: { enabled: false },
mesh: { enabled: false },
iris: { enabled: false },
description: { enabled: false },
emotion: { enabled: false },
antispoof: { enabled: false },
liveness: { enabled: false },
},
gesture: { enabled: false },
hand: { enabled: false },
body: { enabled: false },
object: { enabled: false },
segmentation: { enabled: false}
};
const human = new Human(humanConfig);
...
async function tensorToBuffer(tensor) {
const data = await tensor.data();
const buffer = Buffer.from(data);
return buffer;
}
function resizeImage(imageTensor, width, height) {
return human.tf.image.resizeBilinear(imageTensor, [height, width]);
}
// Function to extract a region from an image tensor
function extractImage(imageTensor, left, top, width, height) {
return imageTensor.slice([top, left, 0], [height, width, -1]);
}
function padImage(imageTensor, targetWidth, targetHeight) {
const [height, width, channels] = imageTensor.shape;
const top = Math.floor((targetHeight - height) / 2);
const bottom = targetHeight - height - top;
const left = Math.floor((targetWidth - width) / 2);
const right = targetWidth - width - left;
return human.tf.pad(imageTensor, [[top, bottom], [left, right], [0, 0]]);
}
let lastbox;
let facedetect = false;
async function processImage2(frameToSend, arW, arH, fW, fH){
let extractedImage;
const tensor = await human.tf.node.decodeJpeg(frameToSend, 3); // decode jpeg buffer to raw tensor
let current_f = img_dim2;
let current_ar = use_aspect_ratio; //set it here because use_aspect_ratio might change mid processing when received as msg
if(current_ar == 'cover'){
extractedImage = tensor;
}
else if (['32', '64', '128', '256','512'].includes(current_ar)) //select the center 32 pixels of the image
{
let nv = parseInt(current_ar);
extractedImage = await human.tf.tidy(() => extractImage(tensor, (arW-nv) / 2, (arH-nv) / 2, nv, nv));
}
else if (['32t', '64t', '128t', '256t','512t'].includes(current_ar)) //select the center 32 pixels offset height of the image
{
let nv = parseInt(current_ar.replace('t',''));
let nvt = nv*2;
extractedImage = await human.tf.tidy(() => extractImage(tensor, (arW-nv) / 2, (arH-nvt) / 2, nv, nv));
}
else if(current_ar.includes('face')){
if(busy ) {
human.tf.dispose(tensor);
console.log("Busy");
return;
}
busy = true;
let nW = arW / current_f; //arW = 1920 current_f = 4 nW = 480
let nH = arH / current_f; //arH = 1080 current_f = 4 nH = 270
const tensor_r = await human.tf.tidy(() => resizeImage(tensor, nW,nH)); //resize to improve performance?
const tensor_b = await human.tf.tidy(() => padImage(tensor_r, nW,nW)); //pad to a square to improve face rec -- this definitely improve detection
human.tf.dispose(tensor_r);
let res;
if(current_ar == 'faceInterpolated'){
const res1 = await human.detect(tensor_b);
res = await human.next(res1);
}
else{
res = await human.detect(tensor_b);
}
//console.log(human.tf.engine().memory());
//console.log(res.performance);
if (res?.face?.[0]){
facedetect = true;
thebox = res.face[0].box
let left = thebox[0];
let top = thebox[1];
let width2 = thebox[2] ;
let height2 = thebox[3];
lastbox = thebox;
// Ensure the coordinates and dimensions are within the bounds of the original image
if (left < 0) left = 0;
if (top < 0) top = 0;
if (left + thebox[2] > nW) width2 = nW - left;
if (top + thebox[3] > nW) height2 = nW - top;
extractedImage = await human.tf.tidy(() => extractImage(tensor_b, left, top, width2, height2));
human.tf.dispose(tensor_b);
}
else{
facedetect = false;
extractedImage = tensor;
}
busy = false;
}
else{ //contain
extractedImage = tensor;
}
const exBuffer = await tensorToBuffer(extractedImage); //set to buffer so can be used by sharp
sharpImage = sharp(exBuffer, { raw: { width: extractedImage.shape[1], height: extractedImage.shape[0], channels: 3 }}); //export as sharp to resize later as tf resize bilinear seems to be horrible
human.tf.dispose(tensor);
human.tf.dispose(extractedImage);
return sharpImage;
}
```
Is this is a bug? I can't find anywhere in the documentation the input image should be cropped or padded into a square?
Finally performance is great when a face is found, but when a face isn't found, performance drops from 25fps to about 17fps.
Discussed in https://github.com/vladmandic/human/discussions/468