gpujs / gpu.js

GPU Accelerated JavaScript
https://gpu.rocks
MIT License
15.08k stars 650 forks source link

nodejs sample test used gpu, but the kernel.build takes much longer than browser, sample code attached #691

Closed lancety closed 3 years ago

lancety commented 3 years ago

[updates 02 May]

Bellow code runs well on browser, the "tBuild" take much longer time; please ignore the kernel logic not matching passed in array dimension (if it does not affect kernel build time)

data 0 init gpu 263 init cpu 0 tFn 3 tBuild 156 run gpu.js array run 2 run 1 run 1 run gpu.js typedarray run 1 run 0 run 1 run cpu.js run 22 run 22 run 18 run js native run 30 run 35 run 23

`

var GPU = globalThis.GPU || require("gpu.js").GPU; var input = GPU.input || require("gpu.js").input;

function run() {

const testSize = 146;
const loop = 1;

const generateMatrices = () => {
    const matrices = []
    for (let y = 0; y < testSize; y++) {
        for (let x = 0; x < testSize; x++) {
            matrices.push(y)
        }
    }
    return matrices
}

const tStart = Date.now();

const arr = generateMatrices();
const typed = Int16Array.from(arr);
const tData = Date.now();
console.log("data", tData - tStart)

const gpu = new GPU({
    mode: "gpu",
});
let tInit = Date.now();
console.log("init gpu", tInit - tData);

const cpu = new GPU({
    mode: "cpu"
});
console.log("init cpu", Date.now() - tInit)
tInit = Date.now()

const multiplyMatrix = gpu.createKernel(function (a, b) {
    let sum = 0;
    for (let i = 0; i < this.constants.size; i++) {
        sum += a[this.thread.y][i] * b[i][this.thread.x];
    }
    return sum;
}, {
    output: [testSize, testSize],
    constants: {
        size: testSize
    },
})
const multiplyMatrixCpu = cpu.createKernel(function (a, b) {
    let sum = 0;
    for (let i = 0; i < this.constants.size; i++) {
        sum += a[this.thread.y][i] * b[i][this.thread.x];
    }
    return sum;
}, {
    output: [testSize, testSize],
    constants: {
        size: testSize
    },
})

const multiplyNative = function(a, b) {
    const c = [];
    for (let y =0 ; y< testSize; y++) {
        c[y] = Array(testSize).fill(undefined).map(ig=> y);
        for (let x = 0; x < testSize; x++) {
            for (let i =0; i< testSize; i++) {
                c[y][x] += a[y][i] * b[i][x]
            }
        }
    }
    return c;
}

const tFn = Date.now();
console.log('tFn', tFn - tInit);

multiplyMatrix.build(arr, arr)
// multiplyMatrix.build(input(typed, [arr.length]), input(typed, [arr.length]))
const tBuild = Date.now();
console.log('tBuild', tBuild - tFn)

let out;

console.log("run gpu.js array")
for (let i = 0; i < loop; i++) {
    const tRun = Date.now();
    out = multiplyMatrix(arr, arr)
    const tDone = Date.now();
    console.log("run", tDone - tRun)
}

console.log("run gpu.js typedarray")
for (let i = 0; i < loop; i++) {
    const tRun = Date.now();
    out = multiplyMatrix(input(typed, [arr.length]), input(typed, [arr.length]))
    const tDone = Date.now();
    console.log("run", tDone - tRun)
}

console.log("run cpu.js")
for (let i = 0; i < loop; i++) {
    const tRun = Date.now();
    out = multiplyMatrixCpu(typed, typed)
    const tDone = Date.now();
    console.log("run", tDone - tRun)
}

console.log("run js native")
for (let i = 0; i < loop; i++) {
    const tRun = Date.now();
    out = multiplyNative(typed, typed)
    const tDone = Date.now();
    console.log("run", tDone - tRun)
}

}

run(); `

lancety commented 3 years ago

seems I worked out enough info which answered my question, will close this

robertleeplummerjr commented 3 years ago

What were your findings?

lancety commented 3 years ago

My findings are the sentences with "bold" style

In summary

  1. browser first build take same time as nodejs kernel build. but I guess becuase borwser cache or some backend magic, refresh page does not cause rebuild kernel so it is very quick when refresh page and rerun same code.
  2. gpujs is powerful on heavy calculation tasks, I tried running below code and noticed if the "size" is greater than 254, the build time is very low, which is much lower than native js, but for less than 254 size output, the kernel build time will increasingly higher and the build time is longer and at some point of "size" value it becomes taking longer time than native code when building the kernel
  3. the odd/even output size does not affect build time - different to what I thought when writing this question
  4. use "input" when passing variables into kernel will take less time than passing array directly, but not that much than I thought
  5. the build time is affected by - if "this.thread.x/y" directly used inside loop, if it does, the loop logic convertion will convert the logic loopsize times which takes time.

I am working on my first game project, looking for solution to improve the contour and elevation calculation on a huge game map, which calculates values of 5k+ sides triangles. Definitely gpu.js will be my first option, these learning is useful.