KhronosGroup / SPIRV-Cross

SPIRV-Cross is a practical tool and library for performing reflection on SPIR-V and disassembling SPIR-V back to high level languages.
Apache License 2.0
2.03k stars 559 forks source link

Aliasing of argument buffers on breaks padding in some cases #2318

Closed NikolayKanchevski closed 4 months ago

NikolayKanchevski commented 4 months ago

I have been following very closely the development of the Argument Buffer Aliasing pull request, which has already been merged to the main branch, and while experimenting with all kinds of test scenarios I could come up with, I found a bug.

Let's take the following code for example:

layout(binding = 0) uniform texture2D textures2D[];
layout(binding = 0) uniform texture2D otherTextures2D[];
layout(binding = 0) uniform texture3D textures3D[];

layout(binding = 1, rgba8) uniform image2D float8Images2D[];
layout(binding = 1, rg8i) uniform iimage2D int8Images2D[];
layout(binding = 1, r8ui) uniform uimage3D uint8Images3D[];

layout(binding = 2) uniform sampler samplers[];

layout(location = 0) out vec4 Out;
void main()
{
    Out = vec4(1.0f)
        * texture(sampler2D(textures2D[0], samplers[0]), vec2(0.0f, 0.0f))
        * texture(sampler2D(otherTextures2D[0], samplers[0]), vec2(0.0f, 0.0f))
        * texture(sampler3D(textures3D[0], samplers[0]), vec3(0.0f, 0.0f, 0.0f))
        * imageLoad(float8Images2D[index], ivec2(0, 0)).r
        * imageLoad(int8Images2D[0], ivec2(0, 0)).r
        * imageLoad(uint8Images3D[0], ivec3(0, 0, 0)).r
    ;
}

It compiles under default settings, however, enablingspirv_cross::CompilerMSL::Optionspad_argument_buffer_resources (which, realistically, is essential when porting a bindless design to Metal) causes the (previously) common error:

Argument buffer resource base type could not be determined. When padding argument buffer elements, all descriptor set resources must be supplied with a base type by the app.

My padding setup is the following:

constexpr uint32 RESERVED_SET                   = 0;
constexpr uint32 UNIFORM_BUFFER_BINDING         = 0;
constexpr uint32 STORAGE_BUFFER_BINDING         = 1;
constexpr uint32 SAMPLED_IMAGE_BINDING          = 2;
constexpr uint32 STORAGE_IMAGE_BINDING          = 3;
constexpr uint32 SAMPLER_BINDING                = 4;

/* === Reference: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf === */
constexpr static uint32 UNIFORM_BUFFER_CAPACITY         = 500'000;
constexpr static uint32 STORAGE_BUFFER_CAPACITY         = 500'000;
constexpr static uint32 SAMPLED_IMAGE_CAPACITY          = 500'000;
constexpr static uint32 STORAGE_IMAGE_CAPACITY          = 500'000;
constexpr static uint32 SAMPLER_CAPACITY                = 1024;

constexpr std::array<spirv_cross::MSLResourceBinding, 5> BINDINGS
{
    spirv_cross::MSLResourceBinding {
        .basetype = spirv_cross::SPIRType::Void,
        .desc_set = RESERVED_SET,
        .binding = UNIFORM_BUFFER_BINDING,
        .count = UNIFORM_BUFFER_CAPACITY
    },
    spirv_cross::MSLResourceBinding {
        .basetype = spirv_cross::SPIRType::Void,
        .desc_set = RESERVED_SET,
        .binding = STORAGE_BUFFER_BINDING,
        .count = STORAGE_BUFFER_CAPACITY
    },
    spirv_cross::MSLResourceBinding {
        .basetype = spirv_cross::SPIRType::SampledImage,
        .desc_set = RESERVED_SET,
        .binding = SAMPLED_IMAGE_BINDING,
        .count = SAMPLED_IMAGE_CAPACITY
    },
    spirv_cross::MSLResourceBinding {
        .basetype = spirv_cross::SPIRType::Image,
        .desc_set = RESERVED_SET,
        .binding = STORAGE_IMAGE_BINDING,
        .count = STORAGE_IMAGE_CAPACITY
    },
    spirv_cross::MSLResourceBinding {
        .basetype = spirv_cross::SPIRType::Sampler,
        .desc_set = RESERVED_SET,
        .binding = SAMPLER_BINDING,
        .count = SAMPLER_CAPACITY
    }
};

It seems the error only occurs when you have more than 2 sampled image aliases and/or more than 2 storage image aliases (commenting out at least 1 sampled image and at least 1 storage image allows for padded compilation). I am not sure as to why this could be happening, and, yes, though a niche use-case, it is a legal one, and should be supported.

HansKristian-Work commented 4 months ago

This fix should be merged now on master right? @billhollings

HansKristian-Work commented 4 months ago

Nvm, this is probably something else.

HansKristian-Work commented 4 months ago

misclick

HansKristian-Work commented 4 months ago

Got a change in flight now to fix some bugs, but this code is also bugged. You're not setting msl_binding/msl_texture/msl_sampler correctly in the remapping, and your bindings don't match the shader. That's partially what's causing the issue.

I applied this diff:

diff --git a/main.cpp b/main.cpp
index 9e768ef0..a8f6ffbf 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1259,7 +1259,50 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
         msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
         msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
         msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
+        msl_opts.pad_argument_buffer_resources = true;
+
         msl_comp->set_msl_options(msl_opts);
+
+        {
+            constexpr uint32_t RESERVED_SET                   = 0;
+            constexpr uint32_t SAMPLED_IMAGE_BINDING          = 0;
+            constexpr uint32_t STORAGE_IMAGE_BINDING          = 1;
+            constexpr uint32_t SAMPLER_BINDING                = 2;
+            constexpr uint32_t UBO_BINDING                    = 3;
+
+            constexpr static uint32_t SAMPLED_IMAGE_CAPACITY          = 5000;
+            constexpr static uint32_t STORAGE_IMAGE_CAPACITY          = 5000;
+            constexpr static uint32_t SAMPLER_CAPACITY                = 1024;
+            constexpr static uint32_t UBO_CAPACITY                    = 16;
+
+            MSLResourceBinding bindings[4] = {};
+            for (auto &b : bindings)
+            {
+                b.stage = ExecutionModelFragment;
+                b.desc_set = RESERVED_SET;
+            }
+
+            bindings[0].basetype = SPIRType::Image; // Sampled image and storage image use Image.
+            bindings[0].binding = SAMPLED_IMAGE_BINDING;
+            bindings[0].count = SAMPLED_IMAGE_CAPACITY;
+            bindings[0].msl_texture = 0;
+            bindings[1].basetype = SPIRType::Image;
+            bindings[1].binding = STORAGE_IMAGE_BINDING;
+            bindings[1].count = STORAGE_IMAGE_CAPACITY;
+            bindings[1].msl_texture = 5000;
+            bindings[2].basetype = SPIRType::Sampler;
+            bindings[2].binding = SAMPLER_BINDING;
+            bindings[2].count = SAMPLER_CAPACITY;
+            bindings[2].msl_sampler = 10000;
+            bindings[3].basetype = SPIRType::Void;
+            bindings[3].binding = UBO_BINDING;
+            bindings[3].count = UBO_CAPACITY;
+            bindings[3].msl_buffer = 10000 + SAMPLER_CAPACITY;
+
+            for (auto &b : bindings)
+                msl_comp->add_msl_resource_binding(b);
+        }
+
         for (auto &v : args.msl_discrete_descriptor_sets)
             msl_comp->add_discrete_descriptor_set(v);
         for (auto &v : args.msl_device_argument_buffers)

With this shader:

#version 450
#extension GL_EXT_nonuniform_qualifier : require
layout(binding = 0) uniform texture2D textures2D[];
layout(binding = 0) uniform texture2D otherTextures2D[];
layout(binding = 0) uniform texture3D textures3D[];

layout(binding = 1, rgba8) uniform image2D float8Images2D[];
layout(binding = 1, rg8i) uniform iimage2D int8Images2D[];
layout(binding = 1, r8ui) uniform uimage3D uint8Images3D[];

layout(binding = 2) uniform sampler samplers[];

layout(binding = 3) uniform UBO { vec4 v; } vs[];
layout(binding = 3) uniform UBO2 { float v; } vs2[];

layout(location = 0) out vec4 Out;
layout(location = 1) flat in int index;

vec4 in_func()
{
    return vec4(1.0f)
        * texture(sampler2D(textures2D[0], samplers[0]), vec2(0.0f, 0.0f))
        * texture(sampler2D(otherTextures2D[1], samplers[0]), vec2(0.0f, 0.0f))
        * texture(sampler3D(textures3D[2], samplers[0]), vec3(0.0f, 0.0f, 0.0f))
        * imageLoad(float8Images2D[index], ivec2(0, 0)).r
        * imageLoad(int8Images2D[0], ivec2(0, 0)).r
        * imageLoad(uint8Images3D[0], ivec3(0, 0, 0)).r + vs[index].v + vs2[0].v;
}

void main()
{
    Out = in_func();
}

./spirv-cross /tmp/test.spv --msl --msl-argument-buffers --msl-version 30000

#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct UBO
{
    float4 v;
};

struct UBO2
{
    float v;
};

struct spvDescriptorSetBuffer0
{
    array<texture2d<float>, 5000> textures2D [[id(0)]];
    // Overlapping binding: array<texture2d<float>, 5000> otherTextures2D [[id(0)]];
    // Overlapping binding: array<texture3d<float>, 5000> textures3D [[id(0)]];
    array<texture2d<float>, 5000> float8Images2D [[id(5000)]];
    // Overlapping binding: array<texture2d<int>, 5000> int8Images2D [[id(5000)]];
    // Overlapping binding: array<texture3d<uint>, 5000> uint8Images3D [[id(5000)]];
    array<sampler, 1024> samplers [[id(10000)]];
    constant UBO* vs [[id(11024)]][16];
    // Overlapping binding: constant UBO2* vs2 [[id(11024)]][16];
};

struct main0_out
{
    float4 Out [[color(0)]];
};

struct main0_in
{
    int index [[user(locn1)]];
};

static inline __attribute__((always_inline))
float4 in_func(constant array<texture2d<float>, 5000>& textures2D, constant array<sampler, 1024>& samplers, constant array<texture2d<float>, 5000>& otherTextures2D, constant array<texture3d<float>, 5000>& textures3D, constant array<texture2d<float>, 5000>& float8Images2D, thread int& index, constant array<texture2d<int>, 5000>& int8Images2D, constant array<texture3d<uint>, 5000>& uint8Images3D, constant UBO* constant (&vs)[16], constant UBO2* constant (&vs2)[16])
{
    return (((((((float4(1.0) * textures2D[0].sample(samplers[0], float2(0.0))) * otherTextures2D[1].sample(samplers[0], float2(0.0))) * textures3D[2].sample(samplers[0], float3(0.0))) * float8Images2D[index].read(uint2(int2(0))).x) * float(int8Images2D[0].read(uint2(int2(0))).x)) * float(uint8Images3D[0].read(uint3(int3(0))).x)) + vs[index]->v) + float4(vs2[0]->v);
}

fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
{
    main0_out out = {};
    constant auto &otherTextures2D = reinterpret_cast<constant array<texture2d<float>, 5000> &>(spvDescriptorSet0.textures2D);
    constant auto &textures3D = reinterpret_cast<constant array<texture3d<float>, 5000> &>(spvDescriptorSet0.textures2D);
    constant auto &int8Images2D = reinterpret_cast<constant array<texture2d<int>, 5000> &>(spvDescriptorSet0.float8Images2D);
    constant auto &uint8Images3D = reinterpret_cast<constant array<texture3d<uint>, 5000> &>(spvDescriptorSet0.float8Images2D);
    constant auto &vs2 = reinterpret_cast<constant UBO2* constant (&)[16]>(spvDescriptorSet0.vs);
    out.Out = in_func(spvDescriptorSet0.textures2D, spvDescriptorSet0.samplers, otherTextures2D, textures3D, spvDescriptorSet0.float8Images2D, in.index, int8Images2D, uint8Images3D, spvDescriptorSet0.vs, vs2);
    return out;
}