treeform / shady

Nim to GPU shader language compiler and supporting utilities.
MIT License
152 stars 12 forks source link

Example of Compute Shader + SSBO instead of TBO #10

Open arkanoid87 opened 1 year ago

arkanoid87 commented 1 year ago

Not really an issue, more a proposal. I could PR this if I manage to wrap my head around this.

While Compute Shaders are capable of dealing with any kind of buffer, including Texture Buffers, Shader Storage Buffer Objects are generic data container not strictly linked with the concept of image that can be used as I/O for Compute Shaders.

from this stackoverflow answer:

Using buffer textures also implies having to deal with gvec4 return values, both with texelFetch() and imageLoad() / imageStore(). This becomes very tedious as soon as you want to work with structures (or arrays thereof) and you don't want to think of some stupid packing scheme using multiple instances of vec4 or using multiple buffer textures to achieve something similar. With a buffer accessed as shader storage, you can simple index into the data store and pull one or more instances of some struct {} directly from the buffer.

I think Shady is the most streamlined way to take custom GPU algorithm into a Nim project right now. An example to use SSBO with Shady would be really interesting to leave the domain of images and join the one of data processing.

arkanoid87 commented 1 year ago

I see there's no layout generation for SSBO object right now

would it work if I use toGLSL extra param to add:

layout(std430, binding = 3) buffer layoutName
{
    int data_SSBO[];
};

at the top of shader source code?

arkanoid87 commented 1 year ago

I've successfully executed a trivial SSBO following a shady template, but had to skip the whole code generation part due to a couple of possibly missing features.

import std/[sugar]
import opengl, shady, shady/compute, pixie

initOffscreenWindow()

var inputData = @[1.uint32,2,3,4,5]
var outputData = @[0.uint32,0,0,0,0]

when false:
    proc computeShader() =
        var pos = gl_GlobalInvocationID
        var i = pos.x
        outputData[i] = inputData[i]

    let computeShaderSrc = toGLSL(
        computeShader,
        "430",
        extra = """
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(std430, binding = 3) buffer inputLayout
{
    int inputData[];
};

layout(std430, binding = 4) buffer outputLayout
{
    int outputData[];
};
"""
    )
else:
    let computeShaderSrc = readFile("computeShader.comp")

dump computeShaderSrc

var shaderId = compileComputeShader((
    "computeShader",
    computeShaderSrc
))
dump shaderId
glUseProgram(shaderId)

var input_ssbo: GLuint
glGenBuffers(1, input_ssbo.addr)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_ssbo)
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32) * inputData.len, inputData[0].addr, GL_STREAM_DRAW)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, input_ssbo)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)

var output_ssbo: GLuint
glGenBuffers(1, output_ssbo.addr)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_ssbo)
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32) * outputData.len, outputData[0].addr, GL_STREAM_DRAW)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, output_ssbo)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)

glDispatchCompute(
    inputData.len.GLuint,
    1.GLuint,
    1.GLuint
)
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT)

let p = cast[ptr UncheckedArray[uint32]](glMapNamedBuffer(output_ssbo, GL_READ_ONLY))
copyMem(
    outputData[0].addr,
    p,
    outputData.len * sizeof(uint32)
)
discard glUnmapNamedBuffer(output_ssbo)

dump outputData

computeShader.comp

#version 430

layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(std430, binding = 3) buffer inputLayout
{
    int inputData[];
};

layout(std430, binding = 4) buffer outputLayout
{
    int outputData[];
};

void main() {
    uvec3 pos = gl_GlobalInvocationID;
    uint i = pos.x;
    outputData[i] = inputData[i];
}

output in when false branch:

computeShaderSrc = #version 430

layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(std430, binding = 3) buffer inputLayout
{
    int inputData[];
};

layout(std430, binding = 4) buffer outputLayout
{
    int outputData[];
};

void main() {
    uvec3 pos = gl_GlobalInvocationID;
    uint i = pos.x;
    outputData[i] = inputData[i];
}
shaderId = 2
outputData = @[1, 2, 3, 4, 5]

output when true branch:

stack trace: (most recent call last)
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(833, 21) toGLSL
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(796, 17) toGLSLInner
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(780, 19) gatherFunction
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(780, 19) gatherFunction
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(780, 19) gatherFunction
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(754, 21) gatherFunction
/home/arkanoid/.nimble/pkgs/shady-0.1.3/shady.nim(9, 8) err
/home/arkanoid/nim/test_shader/src/test_shader.nim(17, 34) template/generic instantiation of `toGLSL` from here
/home/arkanoid/nim/test_shader/src/test_shader.nim(15, 9) Error: [GLSL] Invalid x[y].

Long story short: I've found no way to generate GLSL code for:

layout(std430, binding = 3) buffer inputLayout
{
    int inputData[];
};

layout(std430, binding = 4) buffer outputLayout
{
    int outputData[];
};

and it seems that codegen does not support array indexing x[y]

I might be wrong, is this the case?

treeform commented 1 year ago

Great progress. The code generating just needs to be made to work with SSBOs but you have a clear way to get that to work.

arkanoid87 commented 1 year ago

the lack of layout(std430, binding = 3) buffer inputLayout ... is somehow fixable using the extra param. I'm trying to figure out why it's complaining for x[y]

arkanoid87 commented 1 year ago

adding blank typeInst[0].repr == "seq" case in gatherFunction makes compileComputeShader produce correct output (actually it outputs nothing, but that's exactly what is needed if layout sections are passed in extra

...
            if typeInst.kind == nnkBracketExpr:
              # might be a uniform
              if typeInst[0].repr in ["Uniform", "UniformWriteOnly", "Attribute"]:
                defStr.add typeRename(typeInst[0].repr)
                defStr.add " "
                defStr.add typeRename(typeInst[1].repr)
              elif typeInst[0].repr == "array":
                defStr.add typeRename(typeInst[2].repr)
                defStr.add "["
                defStr.add typeRename(typeInst[1][2].repr)
                defStr.add "]"
              elif typeInst[0].repr == "seq":
                continue
              else:
                err "Invalid x[y].", n
...
arkanoid87 commented 1 year ago

just found out that passing SSBO object to function in GLSL is forbidden

The ".length()" method is not supported, nor is passing the array as a function argument.

issue 2 in: https://registry.khronos.org/OpenGL/extensions/ARB/ARB_shader_storage_buffer_object.txt

arkanoid87 commented 1 year ago

I've successfully implemented my whole algorithm into a large compute shader with shady.

JUST GREAT!

I just wanted to share this

treeform commented 1 year ago

Thats great! Do you have enough code to open a PR to add SSBO to shady?

arkanoid87 commented 1 year ago

actually no, but all I had to do is

Create custom buffer type, just to make thing obvious

type
    SharedStorageObjectBuffer*[T] = seq[T]

Patch shady to don't to anything with it

diff --git a/src/shady.nim b/src/shady.nim
index 707a70e..d3f71b0 100644
--- a/src/shady.nim
+++ b/src/shady.nim
@@ -745,6 +755,9 @@ proc gatherFunction(
                 defStr.add typeRename(typeInst[0].repr)
                 defStr.add " "
                 defStr.add typeRename(typeInst[1].repr)
+              # might be an ssbo
+              elif typeInst[0].repr == "SharedStorageObjectBuffer":
+                continue
               elif typeInst[0].repr == "array":
                 defStr.add typeRename(typeInst[2].repr)
                 defStr.add "["

use extra param to add layout on top of shader:


var myBuffer: SharedStorageObjectBuffer[uint32] = newSeq[uint32](42)

let computeShaderSrc = toGLSL(computeShader, "430", extra = """
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(std430, binding = 1) buffer myBufferLayout
{ uint myBuffer[]; };
"""

then copy buffer to gpu memory like this (wrapped utiity function)

proc copyBufferToGpu*(src: var SharedStorageObjectBuffer, binding: GLuint): GLuint =
    var location: GLuint
    glGenBuffers(1, location.addr)
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, location)
    glBufferData(GL_SHADER_STORAGE_BUFFER, src[0].sizeof * src.len, src[0].addr, GL_STREAM_DRAW)
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, location)
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)
    return location

let myBufferLocation =  copyBufferToGpu(myBuffer, 1) # 1 here is for "binding = 1" in layout section in glsl

after computation copy back buffer from gpu

proc retrieveBufferFromGpu*(dst: var SharedStorageObjectBuffer, location: GLuint) =
    copyMem(
        dst[0].addr,
        cast[ptr UncheckedArray[uint32]](glMapNamedBuffer(location, GL_READ_ONLY)),
        dst[0].sizeof * dst.len
    )
    discard glUnmapNamedBuffer(location)

retrieveBufferFromGpu(muBuffer, myBufferLocation)

at the end what's really missing is the automatic creation of the "layout" section for the SSBO buffer in glsl shader, but it's not really something hard to do otherwise