Open cdata opened 6 days ago
That's odd since it looks like the code should write the lod as well.
We even have snapshot tests for this (naga\tests\in\image.wgsl
-> naga\tests\out\glsl\image.queries.Vertex.glsl
).
@teoxoy thanks for taking a look and for referencing that test!
It may be relevant context: in the shader, I sample a texture within a non-uniform condition (which I ran into when getting it to work under WebGPU). I'm not sure if this kind of conditional logic could change the generated GLSL in a meaningful way, but it may be worth noting.
Here is the WGSL (pre-naga-oil), in case it is helpful.
#import bevy_pbr::mesh_functions::{get_world_from_local, mesh_position_local_to_clip}
struct Vertex {
@location(0) position: vec3<f32>,
@location(1) normal: vec3<f32>,
@location(2) uv: vec2<f32>,
@location(3) operation: u32,
@location(4) palette: vec3<u32>,
@location(5) atlas_index: u32,
@location(6) source_xy: vec2<u32>,
@location(7) screen_xy: vec2<i32>,
@location(8) length: vec2<u32>,
@location(9) layer_mask: u32,
};
struct VertexOutput {
@builtin(position) clip_position: vec4<f32>,
@location(0) uv: vec2<f32>,
@location(1) operation: u32,
@location(2) palette: vec3<u32>,
@location(3) atlas_index: u32,
@location(4) source_xy: vec2<u32>,
@location(5) length: vec2<f32>,
@location(6) layer_mask: u32
};
@vertex
fn vertex(vertex: Vertex) -> VertexOutput {
let w: f32 = f32(vertex.length.x);
let h: f32 = f32(vertex.length.y);
let screen_space_offset = vec3<f32>(0.0, 240., 0.0);
let scaled_size = (vertex.position + vec3<f32>(0.5, -0.5, 0.0)) * vec3<f32>(w, h, 1.);
let draw_offset = vec3<f32>(f32(vertex.screen_xy.x), -1. * f32(vertex.screen_xy.y), 0.);
let position = screen_space_offset + scaled_size + draw_offset;
var out: VertexOutput;
out.clip_position = mesh_position_local_to_clip(
get_world_from_local(0u),
vec4<f32>(position, 1.0)
);
out.uv = vertex.uv;
out.operation = vertex.operation;
out.atlas_index = vertex.atlas_index;
out.palette = vertex.palette;
out.source_xy = vertex.source_xy;
out.length = vec2(w, h);
out.layer_mask = vertex.layer_mask;
return out;
}
// TODO: Atlas binding should be a non-sampled textre_2d<u32>
@group(2) @binding(0) var atlas_texture: texture_2d<f32>;
@group(2) @binding(1) var atlas_texture_sampler: sampler;
@group(2) @binding(2) var palette_texture: texture_2d<f32>;
@group(2) @binding(3) var palette_texture_sampler: sampler;
@group(2) @binding(4) var map_texture: texture_2d_array<u32>;
const CHARACTER_PIXEL_SIZE: vec2<f32> = vec2(16.0);
const ATLAS_CHARACTER_SIZE: vec2<f32> = vec2(16.0);
const PALETTE_SIZE: u32 = 16;
const ATLAS_PIXEL_SIZE: vec2<f32> = CHARACTER_PIXEL_SIZE * ATLAS_CHARACTER_SIZE;
const PALETTE_PIXEL_SIZE: vec2<f32> = vec2(f32(PALETTE_SIZE));
const MAX_MAP_LAYERS: u32 = 4;
const SPRITE_OPERATION: u32 = 0;
const MAP_OPERATION: u32 = 1;
const PALETTE_LOCK_MASK: u32 = 65535;
const LOCK_COUNTER_MASK: u32 = 32768;
fn sample_palette(index: u32, palette: vec3<u32>) -> vec4<f32> {
let palette_offset = palette.y;
let palette_lock = palette.z;
var palette_index = index % PALETTE_SIZE;
// We only need to do offset math if the color is "free" (not locked)
let is_free = ((palette_lock >> palette_index) & 1) == 0;
if (is_free) {
if (palette_lock > 0) {
// At least one other color is locked.
// 8 bytes (64 bits) to store "free" indices in order, noting
// Indices are in the range [0,15] and therefor
// can be represented by 4 bits each
var free_indices_0: u32 = 0;
var free_indices_1: u32 = 0;
// NOTE: Tried to be clever with a pointer, but there are usage
// restrictions imposed by Naga's IR that seem to apply but that
// I don't fully understand.
// SEE: https://github.com/gfx-rs/wgpu/issues/4451
// var free_indices = &free_indices_0;
var free_bits: u32 = 0;
var lock_counter = palette_lock;
for (var i: u32 = 0; i < PALETTE_SIZE; i += 1u) {
if ((lock_counter & 1) == 0) {
// This bit is "free"
let buffer_offset: u32 = free_bits - free_bits / 8 * 8;
let value = u32(i) << (4 * buffer_offset);
// OR the index into the buffer at the appropriate offset
if (buffer_offset < free_bits) {
free_indices_1 |= value;
} else {
free_indices_0 |= value;
}
free_bits += 1u;
} else if (i < index) {
// This bit is "locked"
// Decrement the starting index to account for proceding
// "locked" indices
palette_index -= 1u;
}
// Shift the lock counter to the next bit
lock_counter >>= 1u;
}
// The total set of slots that a color may shift through is
// equal to the free bit count, so modulo that to get the
// real offset.
palette_index = (palette_index + palette_offset) % free_bits;
var free_indices = free_indices_0;
if (palette_index > 7u) {
free_indices = free_indices_1;
palette_index -= 8u;
}
palette_index = (free_indices >> ((palette_index) * 4)) & (PALETTE_SIZE - 1);
} else {
// All colors are free. Modulus will suffice to determine the
// final index.
palette_index = (palette_index + palette_offset) % PALETTE_SIZE;
}
}
let palette_uv = vec2(
fract(f32(palette_index) / f32(PALETTE_SIZE)),
0.0
);
return textureSampleLevel(palette_texture, palette_texture_sampler, palette_uv, 0.);
}
fn sample_sprite(atlas_index: u32, palette: vec3<u32>, uv: vec2<f32>) -> vec4<f32> {
let atlas_coordinates = vec2(
f32(atlas_index % u32(ATLAS_CHARACTER_SIZE.x)),
f32(atlas_index / u32(ATLAS_CHARACTER_SIZE.y) % u32(ATLAS_CHARACTER_SIZE.y))
);
let atlas_uv = vec2(uv.x, 1. - uv.y) / 16. + (atlas_coordinates / 16.);
// This gets us a color in the range [0, 15]
// TODO: Use textureLoad and get a u32 directly / save upload bandwidth
// NOTE: We cannot use a simple `textureSample` in these functions, because
// the sample happens conditionally within the fragment and WebGPU does not
// allow MIP-aware sampling within non-uniform-based conditions (sheesh).
var palette_index = u32(textureSampleLevel(atlas_texture, atlas_texture_sampler, atlas_uv, 0.).r * 256.);
return sample_palette(palette_index, palette);
}
fn sample_map_layer(layer: u32, source_xy: vec2<u32>, length: vec2<f32>, palette: vec3<u32>, uv: vec2<f32>) -> vec4<f32> {
let map_uv = vec2(uv.x, 1. - uv.y);
let map_size: vec2<u32> = textureDimensions(map_texture, 0).xy;
let map_source_xy = vec2<f32>(source_xy) / CHARACTER_PIXEL_SIZE;
let map_length = floor(length / CHARACTER_PIXEL_SIZE);
let map_coords = vec2<u32>(map_source_xy + map_length * map_uv);
let atlas_index = textureLoad(map_texture, map_coords, layer, 0).x;
let tile_length: vec2<f32> = length / CHARACTER_PIXEL_SIZE;
let tile = tile_length * map_uv + fract(map_source_xy);
let tile_uv: vec2<f32> = fract(tile);
return sample_sprite(atlas_index, palette, vec2(tile_uv.x, 1 - tile_uv.y));
}
fn sample_map(source_xy: vec2<u32>, length: vec2<f32>, layer_mask: u32, palette: vec3<u32>, uv: vec2<f32>) -> vec4<f32> {
// NOTE: Currently naga desugars this incorrectly when targeting WebGL2, so
// we hardcode the max value; this trades off some perf in circumstances where
// a map is used that has fewer than 4 layers,
// https://github.com/gfx-rs/wgpu/issues/6435
// let map_layers: u32 = min(textureNumLayers(map_texture), MAX_MAP_LAYERS);
let map_layers: u32 = MAX_MAP_LAYERS;
var mask_includes: array<bool,4> = array(true, true, true, true);
if (layer_mask > 0) {
mask_includes = array(
(layer_mask & 1) == 1,
((layer_mask >> 1) & 1) == 1,
((layer_mask >> 1) & 1) == 1,
((layer_mask >> 1) & 1) == 1
);
}
var final_color = vec4(0.0);
for (var i = 0u; i < map_layers; i += 1u) {
let include_layer = mask_includes[i];
if (!include_layer) {
continue;
}
let layer_color = sample_map_layer(i, source_xy, length, palette, uv);
if (layer_color.a > 0.0) {
final_color = layer_color;
}
}
return final_color;
}
@fragment
fn fragment(in: VertexOutput) -> @location(0) vec4<f32> {
let operation = in.operation;
let atlas_index = in.atlas_index;
let uv = in.uv;
let palette = in.palette;
let length = in.length;
let source_xy = in.source_xy;
let layer_mask = in.layer_mask;
var palette_uv = vec2<f32>(1./16. * 3., 0.0);
if (operation == SPRITE_OPERATION) {
return sample_sprite(atlas_index, palette, uv);
} else if (operation == MAP_OPERATION) {
return sample_map(source_xy, length, layer_mask, palette, uv);
}
return vec4(0.0);
}
I sample a texture within a non-uniform condition (which I ran into when getting it to work under WebGPU). I'm not sure if this kind of conditional logic could change the generated GLSL in a meaningful way.
It shouldn't.
Could you create a minimal repro and try to pass it to the naga CLI? To rule out any transforms that naga-oil might be doing.
Description I make use of
wgpu
transitively viabevy
.When I build my
bevy
app for WebGL2, my use oftextureNumLayers
is transformed to a usetextureSize
that does not seem to be compatible with GLSL 3.00.The usage in the source looks like this:
And the resulting GLSL looks like this:
Note that the resulting call to
textureSize
is only passed one argument. This produces an error that there is no matching overload fortextureSize
. Upon consulting the spec,textureSize
does indeed seem to expect a second positional argument for the mip level:However,
textureNumLayers
does not accept a similar second argument (https://www.w3.org/TR/WGSL/#texturenumlayers).Repro steps
textureNumLayers
in a WGSL shaderExpected vs observed behavior The resulting GLSL is compatible with WebGL2 in web browsers, however it is not.
Extra materials Here is the full transformed output of the offending fragment shader (captured via Chrome dev tools): https://gist.github.com/cdata/22a420fee91854366e13f01983e04dfd
Platform Chrome 129, Linux / Wayland bevy 0.14.2, wgpu 0.20.1