victordibia / handtrack.js

A library for prototyping realtime hand detection (bounding box), directly in the browser.
https://victordibia.com/handtrack.js/
MIT License
2.83k stars 250 forks source link

post-processing optimization suggestion (with code) #62

Open vladmandic opened 2 years ago

vladmandic commented 2 years ago

currently handdetector.js module downloads all results and processes per-class scores in js before going back to tfjs
which causes large dataset transfers from tfjs backend
(either gpu (when used with webgl tfjs backend) or from within webassembly context (when used with wasm tfjs backend))
and then recreate required tensor and upload it back to tfjs backend

that is a double unnecessary round-trip for data plus cpu intensive processing loop in js

a much more efficient approach is to handle as much processing as possible using tfjs and only download final results
(not to mention much shorter code)

async function detectHands(input: Tensor, outputSize = [1, 1]) {
  const hands = tf.tidy(() => {
    const [rawScores, rawBoxes] = await models.executeAsync(tensor, modelOutputNodes);
    const boxes = tf.squeeze(rawBoxes, [0, 2]); // remove zero-dims 
    const scores = tf.squeeze(rawScores, [0]); // remove zero-dims 
    const classScores = tf.unstack(scores, 1); // split all-scores into individual per-class scores
    const hands = [];
    // now lets process data once per each class
    // could also be used to process only some classes, e.g. skip face if desired
    for (let i = 0; i < classScores.length; i++) {
      // get best results for each class
      nmsT = await tf.image.nonMaxSuppressionAsync(boxes, classScores[i], maxDetected, iouThreshold, minConfidence);
      const nms = await nmsT.data();
      for (const res of nms) { // now process only those best results
        const boxSlice = tf.slice(t.boxes, res, 1); // download just the box we need
        const yxBox = await boxSlice.data();
        // convert [y1,x1,y1,x2] to [x,y,width,height]
        const boxRaw = [yxBox[1], yxBox[0], yxBox[3] - yxBox[1], yxBox[2] - yxBox[0]];
        // scale back to original resolution
        const box = [Math.trunc(boxRaw[0] * outputSize[0]), Math.trunc(boxRaw[1] * outputSize[1]), Math.trunc(boxRaw[2] * outputSize[0]), Math.trunc(boxRaw[3] * outputSize[1])];
        const scoreSlice = tf.slice(classScores[i], res, 1); // download just the score we need
        const score = await scoreSlice.data();
        const hand = { score: score[0], box, boxRaw, class: classes[i] };
        hands.push(hand);
      }
    }
    return hands; 
  })
}

i'd post this as PR, but it's total change of the library, so posting here instead...

.hth.