tensorflow / tfjs

A WebGL accelerated JavaScript library for training and deploying ML models.
https://js.tensorflow.org
Apache License 2.0
18.48k stars 1.93k forks source link

WASM backend doesn't use SIMD #3896

Closed ghli2 closed 4 years ago

ghli2 commented 4 years ago

Versions: tfjs 2.3.0, tfjs-backend-wasm 2.3.0 Browser: Electron 10

Describe the current behavior tfjs requests tfjs-backend-wasm.wasm instead of tfjs-backend-wasm-threaded-simd.wasm. Wasm then runs very slowly.

Describe the expected behavior An optimized wasm file should have been pulled and used. I tried the demo https://tfjs-wasm-simd-demo.netlify.app/ and I do have simd and multithreading capabilities.

Standalone code to reproduce the issue index.html

<!DOCTYPE html>
<html>
<head>
  <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js" type="text/javascript"></script>
  <canvas id="utilCanvas" style="visibility: hidden"></canvas>
  <title>Video</title>
  <!-- Import @tensorflow/tfjs or @tensorflow/tfjs-core -->
  <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
  <!-- Adds the WAsm backend to the global backend registry -->
  <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm/dist/tf-backend-wasm.js"></script>
  <script>
    tf.setBackend('wasm');
    const params = {
      segmentationThreshold: 0.5,
      scoreThreshold: 0.4,
      internalResolution: 'full',
      backgroundBlurAmount: 18,
      edgeBlurAmount: 13,
      opacity: 0.7,
      maskBlurAmount: 3,
    }
  </script>
  <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/body-pix"></script>
</head>
<body style="padding:5px; overflow:hidden">
  <div id="videoDiv" style="position: absolute; top: 20px; left: 20px; width: 50%; height: 100%;">
    <video id="localVideo" autoplay="true"
      style="width: 640px; height: 480px; position:absolute; left:1%; overflow:scroll; z-index:1; top:10%;"></video>
    <canvas id="canvas" autoplay
      style="width: 640px; height: 480px; position:absolute; left:1%; overflow:scroll; z-index:1; top:10%;"></canvas>
    <button id='blurBtn'>blur</button>
    <button id='resetBtn'>reset</button>
  </div>
  <script src="index.js"></script>
</body>
</html>

index.js

var localVideo = null;
var canvas = null;
var blurHandle;
var enableBgMod = false;

function switchToTrack(switchToTrack) {
    if (switchToTrack === "canvas") {
        localVideo.style.visibility = "hidden";
        canvas.style.visibility = "visible";
    } else {
        canvas.style.visibility = "hidden";
        localVideo.style.visibility = "visible";
    }
};

function blurBg() {
    segmentBodyInRealTime();
}

function showBg() {
    switchToTrack('video');
};

function segmentBodyInRealTime() {
    var mask = true;
    const flipHorizontal = false;
    var context = canvas.getContext('2d');
    var totalFrames = 0;
    let lastTimeFramePrinted = Date.now();
    async function bodySegmentationFrame(ts) {
        blurHandle.segmentPerson(localVideo, { internalResolution: params.internalResolution, segmentationThreshold: params.segmentationThreshold, scoreThreshold: params.scoreThreshold }).then(function (personSegmentation) {
            if (mask) {
                const backgroundDarkeningMask = bodyPix.toMask(personSegmentation, { r: 0, g: 0, b: 0, a: 0 }, { r: 255, g: 255, b: 255, a: 255 });
                bodyPix.drawMask(canvas, localVideo, backgroundDarkeningMask, params.opacity, params.maskBlurAmount);
                mask = false;
            } else {
                bodyPix.drawBokehEffect(canvas, localvideo, personSegmentation, params.backgroundBlurAmount, params.edgeBlurAmount, flipHorizontal);
            }
            switchToTrack('canvas');
        }).then(() => {
            window.requestAnimationFrame(bodySegmentationFrame);
        });
    }
    window.requestAnimationFrame(bodySegmentationFrame);
};

if (navigator.mediaDevices.getUserMedia) {
    navigator.mediaDevices.getUserMedia({ video: true, frameRate: { max: 1 } })
        .then(function (stream) {
            localVideo.srcObject = stream;
        })
        .catch(function (e) {
            console.log(e);
            console.log("Something went wrong!");
        });
    tf.ready().then(() => bodyPix.load({
        architecture: 'MobileNetV1',
        outputStride: 16,
        multiplier: 1,
        quantBytes: 4,
    })).then(function (out) {
        blurHandle = out;
    });
}

$(document).ready(() => {
    $('button#blurBtn').click(() => {
        blurBg();
    });
    $('button#resetBtn').click(() => {
        showBg();
    });
    localVideo = document.getElementById("localVideo");
    canvas = document.getElementById('canvas');
    localVideo.width = utilCanvas.width = canvas.width = 640;
    localVideo.height = utilCanvas.height = canvas.height = 480;
});

(On a related side note, would the wasm optimizations even be faster than webgl?)

annxingyuan commented 4 years ago

Hi @ghli2 - the netlify demo is subscribed to a Chrome origin trial for WASM SIMD. Unless you've also opted your demo domain in to an origin trial, then you have to manually enable SIMD by going to chrome://flags ("WebAssembly SIMD support", "WebAssembly Threads support") - hopefully that will fix it!

ghli2 commented 4 years ago

I enabled SIMD in the browser and now it does use the correct ...-simd-threads.wasm binary. However, I noticed that, at least for BodyPix, the ResNet50 model/architecture does not seem to work with wasm (it doesn't classify people at all) and shows a XNN status for xnn_create_max_pooling2d_nhwc_f32 is not successful. warning. Is ResNet50 not compatable with wasm?

I also noticed that on the blazeface demo, using wasm, my gpu spins up to 20% while there is effectively no GPU utilization with BodyPix, does WASM use GPU acceleration as well?

On an related note, I am exploring the use of these WASM enhancements in the interest of speed. I've read that MobileNetV2 is faster than MobileNetV1. BodyPix currently uses MobileNetV1, seeing how both architectures are 'MobileNet', is there a way short of retraining a model to switch to MobileNetV2?

google-ml-butler[bot] commented 4 years ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 7 dyas if no further activity occurs. Thank you.

google-ml-butler[bot] commented 4 years ago

Closing as stale. Please @mention us if this needs more attention.