gpujs / gpu.js

GPU Accelerated JavaScript
https://gpu.rocks
MIT License
15.14k stars 658 forks source link

Strange output on Array3D(3) toArray() #622

Open callumhay opened 4 years ago

callumhay commented 4 years ago

What is wrong?

When using a kernel to draw a CPU Array3D(3) into a texture and then converted back using toArray(), the resulting array is very strange and wrong, with some values being correct and others being either duplicated or entirely incorrect.

Where does it happen?

In GPU.js, running on my Windows 7 PC, headlessgl in node.js v12.18.1.

How do we replicate the issue?

Create a 3D array and assign each value set to its index in the array... e.g.,

const gridSize = 8;
const buffer = [];
for (let x = 0; x < gridSize; x++) {
  let currXArr = [];
  buffer.push(currXArr);
  for (let y = 0; y < gridSize; y++) {
    let currYArr = [];
    currXArr.push(currYArr);
    for (let z = 0; z < gridSize; z++) {
      currYArr.push(new Float32Array([x,y,z]));
    }
  }
}

Build the following kernel:

const pipelineFuncSettings = {
  output: [gridSize, gridSize, gridSize],
  pipeline: true,
  returnType: 'Array(3)',
};
const copyFramebufferFuncImmutable = gpu.createKernel(function(framebufTex) {
  const voxelColour = framebufTex[this.thread.x][this.thread.y][this.thread.z];
  return [voxelColour[0], voxelColour[1], voxelColour[2]];
}, {...pipelineFuncSettings, immutable: true, argumentTypes: {framebufTex: 'Array3D(3)'}});

Draw the CPU buffer into a GPU texture and retrieve it as an array:

const bufferTex = copyFramebufferFuncImmutable(buffer);
const texArray  = bufferTex.toArray();

Then look to see if what we did was properly copied from the CPU -> GPU -> CPU:

function debugPrint(arr) {
  for (let x = 0; x < gridSize; x++) {
    for (let y = 0; y < gridSize; y++) {
      const temp = [];
      for (let z = 0; z < gridSize; z++) {
        const currColour = arr[x][y][z];
        temp.push("("+currColour[0].toFixed(0)+","+currColour[1].toFixed(0)+","+currColour[2].toFixed(0)+")")
      }
      strArr.push(temp.join(", "));
    }
  }
  console.log(strArr.join("\n"));
}

... and get this comparative abomination:

CPU (Original/Control) i.e., debugPrint(buffer);:

(0,0,0), (0,0,1), (0,0,2), (0,0,3), (0,0,4), (0,0,5), (0,0,6), (0,0,7)
(0,1,0), (0,1,1), (0,1,2), (0,1,3), (0,1,4), (0,1,5), (0,1,6), (0,1,7)
(0,2,0), (0,2,1), (0,2,2), (0,2,3), (0,2,4), (0,2,5), (0,2,6), (0,2,7)
(0,3,0), (0,3,1), (0,3,2), (0,3,3), (0,3,4), (0,3,5), (0,3,6), (0,3,7)
(0,4,0), (0,4,1), (0,4,2), (0,4,3), (0,4,4), (0,4,5), (0,4,6), (0,4,7)
(0,5,0), (0,5,1), (0,5,2), (0,5,3), (0,5,4), (0,5,5), (0,5,6), (0,5,7)
(0,6,0), (0,6,1), (0,6,2), (0,6,3), (0,6,4), (0,6,5), (0,6,6), (0,6,7)
(0,7,0), (0,7,1), (0,7,2), (0,7,3), (0,7,4), (0,7,5), (0,7,6), (0,7,7)
(1,0,0), (1,0,1), (1,0,2), (1,0,3), (1,0,4), (1,0,5), (1,0,6), (1,0,7)
(1,1,0), (1,1,1), (1,1,2), (1,1,3), (1,1,4), (1,1,5), (1,1,6), (1,1,7)
(1,2,0), (1,2,1), (1,2,2), (1,2,3), (1,2,4), (1,2,5), (1,2,6), (1,2,7)
(1,3,0), (1,3,1), (1,3,2), (1,3,3), (1,3,4), (1,3,5), (1,3,6), (1,3,7)
(1,4,0), (1,4,1), (1,4,2), (1,4,3), (1,4,4), (1,4,5), (1,4,6), (1,4,7)
(1,5,0), (1,5,1), (1,5,2), (1,5,3), (1,5,4), (1,5,5), (1,5,6), (1,5,7)
(1,6,0), (1,6,1), (1,6,2), (1,6,3), (1,6,4), (1,6,5), (1,6,6), (1,6,7)
(1,7,0), (1,7,1), (1,7,2), (1,7,3), (1,7,4), (1,7,5), (1,7,6), (1,7,7)
(2,0,0), (2,0,1), (2,0,2), (2,0,3), (2,0,4), (2,0,5), (2,0,6), (2,0,7)
(2,1,0), (2,1,1), (2,1,2), (2,1,3), (2,1,4), (2,1,5), (2,1,6), (2,1,7)
(2,2,0), (2,2,1), (2,2,2), (2,2,3), (2,2,4), (2,2,5), (2,2,6), (2,2,7)
(2,3,0), (2,3,1), (2,3,2), (2,3,3), (2,3,4), (2,3,5), (2,3,6), (2,3,7)
(2,4,0), (2,4,1), (2,4,2), (2,4,3), (2,4,4), (2,4,5), (2,4,6), (2,4,7)
(2,5,0), (2,5,1), (2,5,2), (2,5,3), (2,5,4), (2,5,5), (2,5,6), (2,5,7)
(2,6,0), (2,6,1), (2,6,2), (2,6,3), (2,6,4), (2,6,5), (2,6,6), (2,6,7)
(2,7,0), (2,7,1), (2,7,2), (2,7,3), (2,7,4), (2,7,5), (2,7,6), (2,7,7)
(3,0,0), (3,0,1), (3,0,2), (3,0,3), (3,0,4), (3,0,5), (3,0,6), (3,0,7)
(3,1,0), (3,1,1), (3,1,2), (3,1,3), (3,1,4), (3,1,5), (3,1,6), (3,1,7)
(3,2,0), (3,2,1), (3,2,2), (3,2,3), (3,2,4), (3,2,5), (3,2,6), (3,2,7)
(3,3,0), (3,3,1), (3,3,2), (3,3,3), (3,3,4), (3,3,5), (3,3,6), (3,3,7)
(3,4,0), (3,4,1), (3,4,2), (3,4,3), (3,4,4), (3,4,5), (3,4,6), (3,4,7)
(3,5,0), (3,5,1), (3,5,2), (3,5,3), (3,5,4), (3,5,5), (3,5,6), (3,5,7)
(3,6,0), (3,6,1), (3,6,2), (3,6,3), (3,6,4), (3,6,5), (3,6,6), (3,6,7)
(3,7,0), (3,7,1), (3,7,2), (3,7,3), (3,7,4), (3,7,5), (3,7,6), (3,7,7)
(4,0,0), (4,0,1), (4,0,2), (4,0,3), (4,0,4), (4,0,5), (4,0,6), (4,0,7)
(4,1,0), (4,1,1), (4,1,2), (4,1,3), (4,1,4), (4,1,5), (4,1,6), (4,1,7)
(4,2,0), (4,2,1), (4,2,2), (4,2,3), (4,2,4), (4,2,5), (4,2,6), (4,2,7)
(4,3,0), (4,3,1), (4,3,2), (4,3,3), (4,3,4), (4,3,5), (4,3,6), (4,3,7)
(4,4,0), (4,4,1), (4,4,2), (4,4,3), (4,4,4), (4,4,5), (4,4,6), (4,4,7)
(4,5,0), (4,5,1), (4,5,2), (4,5,3), (4,5,4), (4,5,5), (4,5,6), (4,5,7)
(4,6,0), (4,6,1), (4,6,2), (4,6,3), (4,6,4), (4,6,5), (4,6,6), (4,6,7)
(4,7,0), (4,7,1), (4,7,2), (4,7,3), (4,7,4), (4,7,5), (4,7,6), (4,7,7)
(5,0,0), (5,0,1), (5,0,2), (5,0,3), (5,0,4), (5,0,5), (5,0,6), (5,0,7)
(5,1,0), (5,1,1), (5,1,2), (5,1,3), (5,1,4), (5,1,5), (5,1,6), (5,1,7)
(5,2,0), (5,2,1), (5,2,2), (5,2,3), (5,2,4), (5,2,5), (5,2,6), (5,2,7)
(5,3,0), (5,3,1), (5,3,2), (5,3,3), (5,3,4), (5,3,5), (5,3,6), (5,3,7)
(5,4,0), (5,4,1), (5,4,2), (5,4,3), (5,4,4), (5,4,5), (5,4,6), (5,4,7)
(5,5,0), (5,5,1), (5,5,2), (5,5,3), (5,5,4), (5,5,5), (5,5,6), (5,5,7)
(5,6,0), (5,6,1), (5,6,2), (5,6,3), (5,6,4), (5,6,5), (5,6,6), (5,6,7)
(5,7,0), (5,7,1), (5,7,2), (5,7,3), (5,7,4), (5,7,5), (5,7,6), (5,7,7)
(6,0,0), (6,0,1), (6,0,2), (6,0,3), (6,0,4), (6,0,5), (6,0,6), (6,0,7)
(6,1,0), (6,1,1), (6,1,2), (6,1,3), (6,1,4), (6,1,5), (6,1,6), (6,1,7)
(6,2,0), (6,2,1), (6,2,2), (6,2,3), (6,2,4), (6,2,5), (6,2,6), (6,2,7)
(6,3,0), (6,3,1), (6,3,2), (6,3,3), (6,3,4), (6,3,5), (6,3,6), (6,3,7)
(6,4,0), (6,4,1), (6,4,2), (6,4,3), (6,4,4), (6,4,5), (6,4,6), (6,4,7)
(6,5,0), (6,5,1), (6,5,2), (6,5,3), (6,5,4), (6,5,5), (6,5,6), (6,5,7)
(6,6,0), (6,6,1), (6,6,2), (6,6,3), (6,6,4), (6,6,5), (6,6,6), (6,6,7)
(6,7,0), (6,7,1), (6,7,2), (6,7,3), (6,7,4), (6,7,5), (6,7,6), (6,7,7)
(7,0,0), (7,0,1), (7,0,2), (7,0,3), (7,0,4), (7,0,5), (7,0,6), (7,0,7)
(7,1,0), (7,1,1), (7,1,2), (7,1,3), (7,1,4), (7,1,5), (7,1,6), (7,1,7)
(7,2,0), (7,2,1), (7,2,2), (7,2,3), (7,2,4), (7,2,5), (7,2,6), (7,2,7)
(7,3,0), (7,3,1), (7,3,2), (7,3,3), (7,3,4), (7,3,5), (7,3,6), (7,3,7)
(7,4,0), (7,4,1), (7,4,2), (7,4,3), (7,4,4), (7,4,5), (7,4,6), (7,4,7)
(7,5,0), (7,5,1), (7,5,2), (7,5,3), (7,5,4), (7,5,5), (7,5,6), (7,5,7)
(7,6,0), (7,6,1), (7,6,2), (7,6,3), (7,6,4), (7,6,5), (7,6,6), (7,6,7)
(7,7,0), (7,7,1), (7,7,2), (7,7,3), (7,7,4), (7,7,5), (7,7,6), (7,7,7)

GPU toArray() result i.e., debugPrint(texBuffer);:

(0,0,0), (1,0,0), (2,0,0), (3,0,0), (4,0,0), (5,0,0), (6,0,0), (7,0,0)
(0,1,0), (1,1,0), (2,1,0), (3,1,0), (4,1,0), (5,1,0), (6,1,0), (7,1,0)
(0,2,0), (1,2,0), (2,2,0), (3,2,0), (4,2,0), (5,2,0), (6,2,0), (7,2,0)
(0,3,0), (1,3,0), (2,3,0), (3,3,0), (4,3,0), (5,3,0), (6,3,0), (7,3,0)
(0,4,0), (1,4,0), (2,4,0), (3,4,0), (4,4,0), (5,4,0), (6,4,0), (7,4,0)
(0,5,0), (1,5,0), (2,5,0), (3,5,0), (4,5,0), (5,5,0), (6,5,0), (7,5,0)
(0,6,0), (1,6,0), (2,6,0), (3,6,0), (4,6,0), (5,6,0), (6,6,0), (7,6,0)
(0,7,0), (1,7,0), (2,7,0), (3,7,0), (4,7,0), (5,7,0), (6,7,0), (7,7,0)
(0,0,0), (1,0,4), (2,4,5), (3,2,4), (4,3,4), (5,4,4), (6,5,4), (7,6,4)
(0,0,1), (1,5,5), (2,1,5), (3,2,5), (4,3,5), (5,4,5), (6,5,5), (7,6,5)
(0,2,1), (1,0,6), (2,1,6), (3,2,6), (4,3,6), (5,4,6), (6,5,6), (7,6,5)
(0,0,3), (1,0,7), (2,1,7), (3,2,7), (4,3,7), (5,4,7), (6,7,5), (7,6,7)
(0,0,0), (1,1,0), (2,2,0), (3,3,0), (4,4,0), (5,0,5), (6,6,0), (7,7,0)
(0,0,1), (1,1,1), (2,2,1), (3,3,1), (4,1,5), (5,5,1), (6,6,1), (7,7,1)
(0,0,2), (1,1,2), (2,2,2), (3,2,5), (4,4,2), (5,5,2), (6,6,2), (7,7,2)
(0,0,3), (1,1,3), (2,3,5), (3,3,3), (4,4,3), (5,5,3), (6,6,3), (7,7,3)
(0,0,0), (1,0,4), (2,0,4), (3,0,4), (4,0,4), (5,0,4), (6,0,4), (7,0,4)
(0,1,1), (1,1,5), (2,1,5), (3,1,5), (4,1,5), (5,1,5), (6,1,5), (7,1,5)
(0,2,2), (1,2,6), (2,2,6), (3,2,6), (4,2,6), (5,2,6), (6,2,6), (7,2,6)
(0,3,3), (1,3,7), (2,3,7), (3,3,7), (4,3,7), (5,3,7), (6,3,7), (7,3,7)
(0,4,0), (1,4,0), (2,4,0), (3,4,0), (4,4,0), (5,4,0), (6,4,0), (7,4,0)
(0,5,1), (1,5,1), (2,5,1), (3,5,1), (4,5,1), (5,5,1), (6,5,1), (7,5,1)
(0,6,2), (1,6,2), (2,6,2), (3,6,2), (4,6,2), (5,6,2), (6,6,2), (7,6,2)
(0,7,3), (1,7,3), (2,7,3), (3,7,3), (4,7,3), (5,7,3), (6,7,3), (7,7,3)
(0,0,3), (1,0,3), (2,0,3), (3,0,3), (4,0,3), (5,0,3), (6,0,3), (7,0,3)
(0,1,3), (1,1,3), (2,1,3), (3,1,3), (4,1,3), (5,1,3), (6,1,3), (7,1,3)
(0,2,3), (1,2,3), (2,2,3), (3,2,3), (4,2,3), (5,2,3), (6,2,3), (7,2,3)
(0,3,3), (1,3,3), (2,3,3), (3,3,3), (4,3,3), (5,3,3), (6,3,3), (7,3,3)
(0,4,3), (1,4,3), (2,4,3), (3,4,3), (4,4,3), (5,4,3), (6,4,3), (7,4,3)
(0,5,3), (1,5,3), (2,5,3), (3,5,3), (4,5,3), (5,5,3), (6,5,3), (7,5,3)
(0,6,3), (1,6,3), (2,6,3), (3,6,3), (4,6,3), (5,6,3), (6,6,3), (7,6,3)
(0,7,3), (1,7,3), (2,7,3), (3,7,3), (4,7,3), (5,7,3), (6,7,3), (7,7,3)
(0,0,4), (1,0,4), (2,0,4), (3,0,4), (4,0,4), (5,0,4), (6,0,4), (7,0,4)
(0,1,4), (1,1,4), (2,1,4), (3,1,4), (4,1,4), (5,1,4), (6,1,4), (7,1,4)
(0,2,4), (1,2,4), (2,2,4), (3,2,4), (4,2,4), (5,2,4), (6,2,4), (7,2,4)
(0,3,4), (1,3,4), (2,3,4), (3,3,4), (4,3,4), (5,3,4), (6,3,4), (7,3,4)
(0,4,4), (1,4,4), (2,4,4), (3,4,4), (4,4,4), (5,4,4), (6,4,4), (7,4,4)
(0,5,4), (1,5,4), (2,5,4), (3,5,4), (4,5,4), (5,5,4), (6,5,4), (7,5,4)
(0,6,4), (1,6,4), (2,6,4), (3,6,4), (4,6,4), (5,6,4), (6,6,4), (7,6,4)
(0,7,4), (1,7,4), (2,7,4), (3,7,4), (4,7,4), (5,7,4), (6,7,4), (7,7,4)
(0,0,0), (1,0,5), (2,1,5), (3,2,5), (4,3,5), (5,5,1), (6,5,5), (7,6,5)
(0,0,1), (1,0,6), (2,1,6), (3,2,6), (4,6,1), (5,4,6), (6,5,6), (7,6,6)
(0,0,2), (1,0,7), (2,1,7), (3,7,1), (4,3,7), (5,4,7), (6,5,7), (7,6,7)
(0,0,0), (1,1,0), (2,0,1), (3,3,0), (4,4,0), (5,5,0), (6,6,0), (7,7,0)
(0,0,1), (1,1,1), (2,2,1), (3,3,1), (4,4,1), (5,5,1), (6,6,1), (7,7,1)
(0,2,1), (1,1,2), (2,2,2), (3,3,2), (4,4,2), (5,5,2), (6,6,2), (7,2,1)
(0,0,3), (1,1,3), (2,2,3), (3,3,3), (4,4,3), (5,5,3), (6,3,1), (7,7,3)
(0,0,4), (1,1,4), (2,2,4), (3,3,4), (4,4,4), (5,4,1), (6,6,4), (7,7,4)
(0,0,0), (1,0,5), (2,0,5), (3,0,5), (4,0,5), (5,0,5), (6,0,5), (7,0,5)
(0,1,1), (1,1,6), (2,1,6), (3,1,6), (4,1,6), (5,1,6), (6,1,6), (7,1,6)
(0,2,2), (1,2,7), (2,2,7), (3,2,7), (4,2,7), (5,2,7), (6,2,7), (7,2,7)
(0,3,0), (1,3,0), (2,3,0), (3,3,0), (4,3,0), (5,3,0), (6,3,0), (7,3,0)
(0,4,1), (1,4,1), (2,4,1), (3,4,1), (4,4,1), (5,4,1), (6,4,1), (7,4,1)
(0,5,2), (1,5,2), (2,5,2), (3,5,2), (4,5,2), (5,5,2), (6,5,2), (7,5,2)
(0,6,3), (1,6,3), (2,6,3), (3,6,3), (4,6,3), (5,6,3), (6,6,3), (7,6,3)
(0,7,4), (1,7,4), (2,7,4), (3,7,4), (4,7,4), (5,7,4), (6,7,4), (7,7,4)
(0,0,7), (1,0,7), (2,0,7), (3,0,7), (4,0,7), (5,0,7), (6,0,7), (7,0,7)
(0,1,7), (1,1,7), (2,1,7), (3,1,7), (4,1,7), (5,1,7), (6,1,7), (7,1,7)
(0,2,7), (1,2,7), (2,2,7), (3,2,7), (4,2,7), (5,2,7), (6,2,7), (7,2,7)
(0,3,7), (1,3,7), (2,3,7), (3,3,7), (4,3,7), (5,3,7), (6,3,7), (7,3,7)
(0,4,7), (1,4,7), (2,4,7), (3,4,7), (4,4,7), (5,4,7), (6,4,7), (7,4,7)
(0,5,7), (1,5,7), (2,5,7), (3,5,7), (4,5,7), (5,5,7), (6,5,7), (7,5,7)
(0,6,7), (1,6,7), (2,6,7), (3,6,7), (4,6,7), (5,6,7), (6,6,7), (7,6,7)
(0,7,7), (1,7,7), (2,7,7), (3,7,7), (4,7,7), (5,7,7), (6,7,7), (7,7,7)

How important is this (1-5)?

Strikes me as a pretty integral issue since there appears to be a non-identical transformation between CPU and GPU buffers.

Expected behavior (i.e. solution)

Those two arrays should be the same (or at least differ by an obvious matrix transform/mapping), instead the toArray() buffer is filled with duplicate data and errors that don't map at all to the original CPU buffer.

callumhay commented 4 years ago

Tried to debug this... got the point where I found the fragment shader code that may be the issue... perhaps the mapping of the index on the GPU side vs. the CPU side with glReadPixels (see erect3DArray3 in utils.js, and renderRawOutput in float.js)?

I've focused on the relevant frag shader code for my own clarity... still very difficult to follow all the mappings, I don't have enough familiarity with the code base to figure this out without spending a lot more time on it:

vec3 getMemoryOptimizedVec3(sampler2D tex, ivec2 texSize, ivec3 texDim, int z, int y, int x) {
  int fieldIndex = 3 * (x + texDim.x * (y + texDim.y * z));
  int vectorIndex = fieldIndex / 4;
  int vectorOffset = fieldIndex - vectorIndex * 4;
  int readY = vectorIndex / texSize.x;
  int readX = vectorIndex - readY * texSize.x;
  vec4 tex1 = texture2D(tex, (vec2(readX, readY) + 0.5) / vec2(texSize));

  if (vectorOffset == 0) {
    return tex1.xyz;
  } else if (vectorOffset == 1) {
    return tex1.yzw;
  } else {
    readX++;
    if (readX >= texSize.x) {
      readX = 0;
      readY++;
    }
    vec4 tex2 = texture2D(tex, vec2(readX, readY) / vec2(texSize));
    if (vectorOffset == 2) {
      return vec3(tex1.z, tex1.w, tex2.x);
    } else {
      return vec3(tex1.w, tex2.x, tex2.y);
    }
  }
}

ivec3 indexTo3D(int idx, ivec3 texDim) {
  int z = int(idx / (texDim.x * texDim.y));
  idx -= z * int(texDim.x * texDim.y);
  int y = int(idx / texDim.x);
  int x = int(integerMod(idx, texDim.x));
  return ivec3(x, y, z);
}

const float constants_VOXELu_uERR_UNITS_SQR = 0.24999750001874987;
uniform sampler2D user_framebufTex;
ivec2 user_framebufTexSize = ivec2(21, 19);
ivec3 user_framebufTexDim= ivec3(8, 8, 8);
ivec2 uTexSize = ivec2(24, 22);
ivec3 uOutputDim = ivec3(8, 8, 8);

varying vec2 vTexCoord;

void main(void) {
  index = int(vTexCoord.s * float(uTexSize.x)) + int(vTexCoord.t * float(uTexSize.y)) * uTexSize.x;
  threadId = indexTo3D(index, uOutputDim);

  vec3 user_voxelColour=getMemoryOptimizedVec3(user_framebufTex,user_framebufTexSize, user_framebufTexDim, threadId.x, threadId.y, threadId.z);
  vec3 kernelResult = vec3(user_voxelColour[0], user_voxelColour[1], user_voxelColour[2]);
  gl_FragData[0][0] = kernelResult[0];
  gl_FragData[0][1] = kernelResult[1];
  gl_FragData[0][2] = kernelResult[2];
}
callumhay commented 4 years ago

I think I figured out the issue: In getMemoryOptimizedVec3, there is no midway texel increment added to the tex2 lookup i.e., Instead of this: vec4 tex2 = texture2D(tex, vec2(readX, readY) / vec2(texSize));

it should be this: vec4 tex2 = texture2D(tex, (vec2(readX, readY) + 0.5) / vec2(texSize));

(similar to the tex1 lookup).

This appears to fix the issue on my machine. Please let me know if this makes sense!