Open shangjiaxuan opened 2 years ago
related #45 vuh2 works, but my dct8x8 compiled from hlsl gives zero output data when executed, and no error code is retrieved. I changed the code to explicitly write array[0]=1.0, and still 0 result. Resolving to hand-code vulkan to find error.
Code in hlsl:
cbuffer image_info : register(b0)
{
uint2 size;
int2 offset;
uint x_stride;
uint pix_stride;
uint3 global_size;
};
static const uint3 local_size = uint3(1u, 1u, 1u);
Buffer<float> data_in : register(t0);
RWBuffer<float> data_out : register(u0);
struct compute_input {
uint3 global_idx : SV_DispatchThreadID;
};
uint input_local_to_global_idx(uint2 local_idx, int3 local_start)
{
int2 global_location = local_start.xy + int2(local_idx);
// if out of global range, clamp to global range here
global_location = clamp(global_location, int2(0, 0), int2(size) - int2(1, 1));
uint line_offset = x_stride * global_location.y;
return line_offset + global_location.y * pix_stride + local_start.z;
}
uint output_global_to_local_idx(uint2 local_idx, uint3 global_invocation)
{
uint2 global_location = global_invocation.xy * uint2(8, 8) + local_idx;
uint x_out_stride = 8 * global_size.x;
return (x_out_stride * global_location.y + global_location.x) * global_size.z + global_invocation.z;
}
//[numthreads(int(gl_WorkGroupSize.x), int(gl_WorkGroupSize.y), int(gl_WorkGroupSize.z))]
[numthreads(1, 1, 1)]
void main(compute_input input)
{
// copy the data into current data_in
// this should be the index of our 8x8 region
int3 local_start = int3((int2(input.global_idx.xy * uint2(8, 8)) + offset) * int(global_size.z), input.global_idx.z);
float4x4 invocation_data[2][2];
// i: horizontal index in input,
// horizontal index in invocation_data
{
for (uint i = 0; i < 8; ++i) {
for (uint j = 0; j < 8; ++j) {
invocation_data[j / 4][i / 4][j % 4][i % 4] = data_in[input_local_to_global_idx(uint2(j, i), local_start)];
}
}
}
dct_8x8_impl(invocation_data);
//copy all data to output
{
for (uint i = 0; i < 8; ++i) {
for (uint j = 0; j < 8; ++j) {
data_out[output_global_to_local_idx(uint2(j, i), input.global_idx)] = 1.0;// invocation_data[j / 4][i / 4][j % 4][i % 4];
}
}
}
data_out[0] = 1;
}
It seems code in glsl is also not working:
#version 450
#extension GL_GOOGLE_include_directive:enable
#include "dct.glsl"
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
// in pixel units
// use z coordinate for multi channel
layout(push_constant) uniform Image_info
{
uvec2 size;
ivec2 offset;
uint x_stride;
uint pix_stride;
uvec3 global_size;
} image_info;
layout(binding = 1) readonly buffer image_input
{
float data_in[];
};
layout(binding = 2) writeonly buffer image_output
{
float data_out[];
};
uint input_local_to_global_idx(uvec2 local_idx, ivec2 local_start) {
ivec2 global_idx = local_start + ivec2(local_idx);
// if out of global range, clamp to global range here
global_idx = clamp(global_idx, ivec2(0,0), ivec2(image_info.size)-ivec2(1,1));
uint line_offset = image_info.x_stride*global_idx.y;
return line_offset + global_idx.y * gl_WorkGroupSize.z + gl_LocalInvocationID.z;
}
uint output_global_to_local_idx(uvec2 local_idx) {
uvec2 global_location = gl_GlobalInvocationID.xy*uvec2(8,8)+local_idx;
uint x_out_stride = 8*gl_WorkGroupSize.x*gl_NumWorkGroups.x;
return (x_out_stride*global_location.y + global_location.x)*gl_WorkGroupSize.z+gl_LocalInvocationID.z;
}
void main(void) {
// copy the data into current data_in
// this should be the index of our 8x8 region
ivec2 local_start = (ivec2(gl_GlobalInvocationID.xy*uvec2(8,8))+image_info.offset)*ivec2(gl_WorkGroupSize.z);
mat4x4 invocation_data[2][2];
// i: vertical index in input,
// horizontal index in invocation_data
for(uint i = 0; i<8; ++i){
for(uint j = 0; j<8; ++j){
invocation_data[i/4][j/4][i%4][j%4] = data_in[input_local_to_global_idx(uvec2(j,i), local_start)];
}
}
dct_8x8_impl(invocation_data);
//copy all data to output
for(uint i = 0; i<8; ++i){
for(uint j = 0; j<8; ++j){
data_out[output_global_to_local_idx(uvec2(j, i))] = invocation_data[i/4][j/4][i%4][j%4];
}
}
data_out[0] = 1.0;
}
Cannot compile sample, error message: