KhronosGroup / MoltenVK

MoltenVK is a Vulkan Portability implementation. It layers a subset of the high-performance, industry-standard Vulkan graphics and compute API over Apple's Metal graphics framework, enabling Vulkan applications to run on macOS, iOS and tvOS.
Apache License 2.0
4.73k stars 411 forks source link

GPU address fault error when using descriptor indexing + variable descriptor count #2278

Open giomasce opened 1 month ago

giomasce commented 1 month ago

As my previous issue, this comes from the vkd3d test suite. I converted it to pure Vulkan and made as minimal as possible. On my M2 MacBook Air this program spits out the following error:

[mvk-error] VK_ERROR_OUT_OF_DEVICE_MEMORY: MTLCommandBuffer "vkQueueSubmit MTLCommandBuffer on Queue 0-0" execution failed (code 3): Caused GPU Address Fault Error (0000000b:kIOGPUCommandBufferCallbackErrorPageFault)

The program passes both Vulkan and Metal validation, and I can't see any other mistake that might pass through the Vulkan validator.

Not using variable descriptor counts seem to fix the issue, as does passing zero as the first push constant (which is used to address the descriptor array).

Test program source code ```cpp // Compile with: g++ -std=c++20 -o test $(pkg-config --cflags --libs vulkan spirv-tools) test.cpp #include #include #include #include "spirv-tools/libspirv.hpp" const char cs_code[] = R"( ; SPIR-V ; Version: 1.3 ; Generator: Wine VKD3D Shader Compiler; 12 ; Bound: 41 ; Schema: 0 OpCapability Shader OpCapability SampledBuffer OpCapability ImageBuffer OpCapability StorageImageReadWithoutFormat OpCapability StorageImageWriteWithoutFormat OpCapability RuntimeDescriptorArray OpCapability StorageTexelBufferArrayDynamicIndexing OpExtension "SPV_EXT_descriptor_indexing" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %1 "main" OpExecutionMode %1 LocalSize 1 1 1 OpName %1 "main" OpName %7 "r0" OpName %12 "u1" OpName %17 "push_cb_struct" OpName %19 "push_cb" OpDecorate %12 DescriptorSet 0 OpDecorate %12 Binding 0 OpDecorate %15 ArrayStride 4 OpDecorate %17 Block OpMemberDecorate %17 0 Offset 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 %4 = OpTypeFloat 32 %5 = OpTypeVector %4 4 %6 = OpTypePointer Private %5 %7 = OpVariable %6 Private %8 = OpTypeInt 32 0 %9 = OpTypeImage %8 Buffer 0 0 0 2 Unknown %10 = OpTypeRuntimeArray %9 %11 = OpTypePointer UniformConstant %10 %12 = OpVariable %11 UniformConstant %14 = OpConstant %8 2 %15 = OpTypeArray %8 %14 %16 = OpConstant %8 0 %17 = OpTypeStruct %15 %18 = OpTypePointer PushConstant %17 %19 = OpVariable %18 PushConstant %20 = OpTypePointer PushConstant %8 %24 = OpTypePointer UniformConstant %9 %27 = OpTypeVector %8 4 %28 = OpTypeInt 32 1 %29 = OpConstant %28 0 %33 = OpTypePointer Private %4 %1 = OpFunction %2 None %3 %13 = OpLabel %21 = OpInBoundsAccessChain %20 %19 %16 %16 %22 = OpLoad %8 %21 %23 = OpIAdd %8 %16 %22 %25 = OpAccessChain %24 %12 %23 %26 = OpLoad %9 %25 %30 = OpImageRead %27 %26 %29 %31 = OpCompositeExtract %8 %30 0 %32 = OpBitcast %4 %31 %34 = OpInBoundsAccessChain %33 %7 %16 OpStore %34 %32 %35 = OpIAdd %8 %16 %22 %36 = OpAccessChain %24 %12 %35 %37 = OpLoad %9 %36 %38 = OpLoad %5 %7 %39 = OpVectorShuffle %5 %38 %38 0 0 0 0 %40 = OpBitcast %27 %39 OpImageWrite %37 %29 %40 OpReturn OpFunctionEnd )"; static inline VkInstance create_instance() { VkApplicationInfo app_info{}; app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; app_info.pApplicationName = "Test"; app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0); app_info.pEngineName = "No Engine"; app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0); app_info.apiVersion = VK_API_VERSION_1_2; VkInstanceCreateInfo instance_create_info{}; instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instance_create_info.pApplicationInfo = &app_info; VkInstance instance; VkResult result = vkCreateInstance(&instance_create_info, nullptr, &instance); assert(result >= 0); return instance; } static inline VkPhysicalDevice select_physical_device(VkInstance instance) { uint32_t physical_device_count = 1; VkPhysicalDevice physical_device; VkResult result = vkEnumeratePhysicalDevices(instance, &physical_device_count, &physical_device); assert(result >= 0); assert(physical_device_count > 0); return physical_device; } static inline uint32_t select_queue_family(VkPhysicalDevice physical_device) { uint32_t queue_family_properties_count = 1; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_properties_count, nullptr); assert(queue_family_properties_count > 0); std::vector queue_family_properties(queue_family_properties_count); vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_properties_count, queue_family_properties.data()); for (uint32_t i = 0; i < queue_family_properties_count; ++i) { const auto flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; if ((queue_family_properties[i].queueFlags & flags) == flags) { return i; } } assert(false); } static inline VkDevice create_device(VkPhysicalDevice physical_device, uint32_t queue_family, std::vector extension_names) { VkDeviceQueueCreateInfo device_queue_create_info{}; device_queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; device_queue_create_info.queueFamilyIndex = queue_family; device_queue_create_info.queueCount = 1; float queue_priority = 1.0; device_queue_create_info.pQueuePriorities = &queue_priority; uint32_t property_count; VkResult result = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &property_count, nullptr); assert(result >= 0); std::vector extension_properties(property_count); result = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &property_count, extension_properties.data()); assert(result >= 0); for (const auto &extension_property : extension_properties) { if (strcmp(extension_property.extensionName, "VK_KHR_portability_subset") == 0) { extension_names.push_back(extension_property.extensionName); } } VkPhysicalDeviceMeshShaderFeaturesEXT physical_device_mesh_shader_features_ext{}; void *last_struct = nullptr; for (const auto &extension_name : extension_names) { if (strcmp(extension_name, "VK_EXT_mesh_shader") == 0) { physical_device_mesh_shader_features_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT; physical_device_mesh_shader_features_ext.pNext = last_struct; last_struct = &physical_device_mesh_shader_features_ext; } } VkPhysicalDeviceVulkan12Features physical_device_vulkan12_features{}; physical_device_vulkan12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; physical_device_vulkan12_features.pNext = last_struct; last_struct = &physical_device_vulkan12_features; VkPhysicalDeviceVulkan11Features physical_device_vulkan11_features{}; physical_device_vulkan11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; physical_device_vulkan11_features.pNext = last_struct; last_struct = &physical_device_vulkan11_features; VkPhysicalDeviceFeatures2 physical_device_features2{}; physical_device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physical_device_features2.pNext = last_struct; vkGetPhysicalDeviceFeatures2(physical_device, &physical_device_features2); VkDeviceCreateInfo device_create_info{}; device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; device_create_info.pNext = &physical_device_features2; device_create_info.queueCreateInfoCount = 1; device_create_info.pQueueCreateInfos = &device_queue_create_info; device_create_info.enabledExtensionCount = static_cast(extension_names.size()); device_create_info.ppEnabledExtensionNames = extension_names.data(); VkDevice device; result = vkCreateDevice(physical_device, &device_create_info, nullptr, &device); assert(result >= 0); return device; } static inline VkQueue get_queue(VkDevice device, uint32_t queue_family) { VkQueue queue; vkGetDeviceQueue(device, queue_family, 0, &queue); return queue; } static inline std::vector assemble_spirv(const char *code, spv_target_env env) { spvtools::SpirvTools core(env); std::vector spirv; bool res = core.Assemble(code, &spirv); assert(res); res = core.Validate(spirv); assert(res); return spirv; } static inline VkShaderModule create_shader_module(VkDevice device, const std::vector &spirv) { VkShaderModuleCreateInfo shader_module_create_info{}; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; shader_module_create_info.codeSize = spirv.size() * sizeof(spirv[0]); shader_module_create_info.pCode = spirv.data(); VkShaderModule shader_module; VkResult result = vkCreateShaderModule(device, &shader_module_create_info, nullptr, &shader_module); assert(result >= 0); return shader_module; } static inline VkPipeline create_compute_pipeline(VkDevice device, VkShaderModule shader_module, VkPipelineLayout pipeline_layout) { VkPipelineShaderStageCreateInfo pipeline_shader_stage_create_info{}; pipeline_shader_stage_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; pipeline_shader_stage_create_info.stage = VK_SHADER_STAGE_COMPUTE_BIT; pipeline_shader_stage_create_info.module = shader_module; pipeline_shader_stage_create_info.pName = "main"; VkComputePipelineCreateInfo compute_pipeline_create_info{}; compute_pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; compute_pipeline_create_info.stage = pipeline_shader_stage_create_info; compute_pipeline_create_info.layout = pipeline_layout; VkPipeline pipeline; VkResult result = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &compute_pipeline_create_info, nullptr, &pipeline); assert(result >= 0); return pipeline; } static inline VkDeviceMemory allocate_memory(VkDevice device, VkPhysicalDevice physical_device, const VkMemoryRequirements &memory_requirements, VkMemoryPropertyFlags memory_property_flags) { VkPhysicalDeviceMemoryProperties physical_device_memory_properties; vkGetPhysicalDeviceMemoryProperties(physical_device, &physical_device_memory_properties); uint32_t i; for (i = 0; i < VK_MAX_MEMORY_TYPES; ++i) { if (!(memory_requirements.memoryTypeBits & (1u << i))) { continue; } if (physical_device_memory_properties.memoryTypes[i].propertyFlags & memory_property_flags) { break; } } assert(i < VK_MAX_MEMORY_TYPES); VkMemoryAllocateInfo memory_allocate_info{}; memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; memory_allocate_info.allocationSize = memory_requirements.size; memory_allocate_info.memoryTypeIndex = i; VkDeviceMemory device_memory; VkResult result = vkAllocateMemory(device, &memory_allocate_info, nullptr, &device_memory); assert(result >= 0); return device_memory; } static inline std::pair create_buffer_and_memory(VkPhysicalDevice physical_device, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags memory_property_flags) { VkBufferCreateInfo buffer_create_info{}; buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; buffer_create_info.size = size; buffer_create_info.usage = usage; buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VkBuffer buffer; VkResult result = vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer); assert(result >= 0); VkMemoryRequirements memory_requirements; vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); VkDeviceMemory device_memory = allocate_memory(device, physical_device, memory_requirements, memory_property_flags); result = vkBindBufferMemory(device, buffer, device_memory, 0); assert(result >= 0); return {buffer, device_memory}; } static inline VkBuffer create_buffer(VkPhysicalDevice physical_device, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags memory_property_flags) { VkBuffer buffer; std::tie(buffer, std::ignore) = create_buffer_and_memory(physical_device, device, size, usage, memory_property_flags); return buffer; } static inline VkBufferView create_buffer_view(VkDevice device, VkBuffer buffer) { VkBufferViewCreateInfo buffer_view_create_info{}; buffer_view_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; buffer_view_create_info.buffer = buffer; buffer_view_create_info.format = VK_FORMAT_R32_UINT; buffer_view_create_info.offset = 0; buffer_view_create_info.range = VK_WHOLE_SIZE; VkBufferView buffer_view; VkResult result = vkCreateBufferView(device, &buffer_view_create_info, nullptr, &buffer_view); assert(result >= 0); return buffer_view; } static inline void update_buffer_view_descriptors(VkDevice device, VkDescriptorSet descriptor_set, uint32_t binding, uint32_t array_element, VkDescriptorType descriptor_type, uint32_t buffer_view_count, const VkBufferView *buffer_views) { VkWriteDescriptorSet write_descriptor_set{}; write_descriptor_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write_descriptor_set.dstSet = descriptor_set; write_descriptor_set.dstBinding = binding; write_descriptor_set.dstArrayElement = array_element; write_descriptor_set.descriptorCount = buffer_view_count; write_descriptor_set.descriptorType = descriptor_type; write_descriptor_set.pTexelBufferView = buffer_views; vkUpdateDescriptorSets(device, 1, &write_descriptor_set, 0, nullptr); } static inline void begin_command_buffer(VkCommandBuffer command_buffer) { VkCommandBufferBeginInfo command_buffer_begin_info{}; command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); assert(result >= 0); } static inline void submit_command_buffer(VkQueue queue, VkCommandBuffer command_buffer) { VkResult result = vkEndCommandBuffer(command_buffer); assert(result >= 0); VkSubmitInfo submit_info{}; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.commandBufferCount = 1; submit_info.pCommandBuffers = &command_buffer; result = vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE); assert(result >= 0); result = vkQueueWaitIdle(queue); assert(result >= 0); } static inline VkCommandBuffer create_command_buffer(VkDevice device) { VkCommandPoolCreateInfo command_pool_create_info{}; command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; command_pool_create_info.queueFamilyIndex = 0; VkCommandPool command_pool; VkResult result = vkCreateCommandPool(device, &command_pool_create_info, nullptr, &command_pool); assert(result >= 0); VkCommandBufferAllocateInfo command_buffer_allocate_info{}; command_buffer_allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; command_buffer_allocate_info.commandPool = command_pool; command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; command_buffer_allocate_info.commandBufferCount = 1; VkCommandBuffer command_buffer; result = vkAllocateCommandBuffers(device, &command_buffer_allocate_info, &command_buffer); assert(result >= 0); assert(command_buffer); return command_buffer; } int main() { VkInstance instance = create_instance(); VkPhysicalDevice physical_device = select_physical_device(instance); uint32_t queue_family = select_queue_family(physical_device); VkDevice device = create_device(physical_device, queue_family, {"VK_EXT_descriptor_indexing"}); VkQueue queue = get_queue(device, queue_family); const auto cs_spirv = assemble_spirv(cs_code, SPV_ENV_VULKAN_1_1); const VkDescriptorBindingFlags binding_flags = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; VkDescriptorSetLayoutBindingFlagsCreateInfo descriptor_set_layout_binding_flags_create_info{}; descriptor_set_layout_binding_flags_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; descriptor_set_layout_binding_flags_create_info.bindingCount = 1; descriptor_set_layout_binding_flags_create_info.pBindingFlags = &binding_flags; VkDescriptorSetLayoutBinding descriptor_set_layout_binding{}; descriptor_set_layout_binding.binding = 0; descriptor_set_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; descriptor_set_layout_binding.descriptorCount = 2; descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_ALL; VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info{}; descriptor_set_layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; descriptor_set_layout_create_info.pNext = &descriptor_set_layout_binding_flags_create_info; descriptor_set_layout_create_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; descriptor_set_layout_create_info.bindingCount = 1; descriptor_set_layout_create_info.pBindings = &descriptor_set_layout_binding; VkDescriptorSetLayout descriptor_set_layout; VkResult result = vkCreateDescriptorSetLayout(device, &descriptor_set_layout_create_info, nullptr, &descriptor_set_layout); assert(result >= 0); VkPushConstantRange push_constant_range{}; push_constant_range.stageFlags = VK_SHADER_STAGE_ALL; push_constant_range.offset = 0; push_constant_range.size = 8; VkPipelineLayoutCreateInfo pipeline_layout_create_info{}; pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipeline_layout_create_info.setLayoutCount = 1; pipeline_layout_create_info.pSetLayouts = &descriptor_set_layout; pipeline_layout_create_info.pushConstantRangeCount = 1; pipeline_layout_create_info.pPushConstantRanges = &push_constant_range; VkPipelineLayout pipeline_layout; result = vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, &pipeline_layout); assert(result >= 0); VkShaderModule cs_shader_module = create_shader_module(device, cs_spirv); VkPipeline pipeline = create_compute_pipeline(device, cs_shader_module, pipeline_layout); VkBuffer buffer1 = create_buffer(physical_device, device, 1024, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); VkBufferView view1 = create_buffer_view(device, buffer1); VkBuffer buffer2 = create_buffer(physical_device, device, 1024, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); VkBufferView view2 = create_buffer_view(device, buffer2); VkDescriptorPoolSize descriptor_pool_size{}; descriptor_pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; descriptor_pool_size.descriptorCount = 2; VkDescriptorPoolCreateInfo descriptor_pool_create_info{}; descriptor_pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; descriptor_pool_create_info.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT; descriptor_pool_create_info.maxSets = 1; descriptor_pool_create_info.poolSizeCount = 1; descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size; VkDescriptorPool descriptor_pool; result = vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr, &descriptor_pool); assert(result >= 0); const uint32_t descriptor_count = 2; VkDescriptorSetVariableDescriptorCountAllocateInfoEXT descriptor_set_variable_descriptor_count_allocate_info{}; descriptor_set_variable_descriptor_count_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; descriptor_set_variable_descriptor_count_allocate_info.descriptorSetCount = 1; descriptor_set_variable_descriptor_count_allocate_info.pDescriptorCounts = &descriptor_count; VkDescriptorSetAllocateInfo descriptor_set_allocate_info{}; descriptor_set_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; descriptor_set_allocate_info.pNext = &descriptor_set_variable_descriptor_count_allocate_info; descriptor_set_allocate_info.descriptorPool = descriptor_pool; descriptor_set_allocate_info.descriptorSetCount = 1; descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout; VkDescriptorSet descriptor_set; result = vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, &descriptor_set); assert(result >= 0); update_buffer_view_descriptors(device, descriptor_set, 0, 0, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1, &view1); update_buffer_view_descriptors(device, descriptor_set, 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1, &view2); VkCommandBuffer command_buffer = create_command_buffer(device); begin_command_buffer(command_buffer); vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 1, &descriptor_set, 0, nullptr); uint32_t push_constants[] = {1, 0}; vkCmdPushConstants(command_buffer, pipeline_layout, VK_SHADER_STAGE_ALL, 0, sizeof(push_constants), push_constants); vkCmdDispatch(command_buffer, 1, 1, 1); submit_command_buffer(queue, command_buffer); } ```
K0bin commented 1 month ago

Not using variable descriptor counts seem to fix the issue, as does passing zero as the first push constant (which is used to address the descriptor array).

I think I ran into the same issue with Metal + SPIRV-Cross recently: https://github.com/KhronosGroup/SPIRV-Cross/issues/2337

patrick-han commented 1 month ago

I'm facing a similar issue: https://github.com/KhronosGroup/MoltenVK/issues/2271

billhollings commented 1 day ago

Unfortunately I'm not able to replicate this running your app on M3 macOS 14.6.1 with latest MoltenVK.

The following log was generated with these env vars set:

MVK_CONFIG_DEBUG=1
MVK_CONFIG_LOG_LEVEL=4
MVK_CONFIG_TRACE_VULKAN_CALLS=1
Test program log ``` [mvk-trace] vkCreateInstance() [mvk-info] MoltenVK version 1.2.11, supporting Vulkan version 1.2.295. The following 110 Vulkan extensions are supported: VK_KHR_16bit_storage v1 VK_KHR_8bit_storage v1 VK_KHR_bind_memory2 v1 VK_KHR_buffer_device_address v1 VK_KHR_calibrated_timestamps v1 VK_KHR_copy_commands2 v1 VK_KHR_create_renderpass2 v1 VK_KHR_dedicated_allocation v3 VK_KHR_deferred_host_operations v4 VK_KHR_depth_stencil_resolve v1 VK_KHR_descriptor_update_template v1 VK_KHR_device_group v4 VK_KHR_device_group_creation v1 VK_KHR_driver_properties v1 VK_KHR_dynamic_rendering v1 VK_KHR_external_fence v1 VK_KHR_external_fence_capabilities v1 VK_KHR_external_memory v1 VK_KHR_external_memory_capabilities v1 VK_KHR_external_semaphore v1 VK_KHR_external_semaphore_capabilities v1 VK_KHR_fragment_shader_barycentric v1 VK_KHR_format_feature_flags2 v2 VK_KHR_get_memory_requirements2 v1 VK_KHR_get_physical_device_properties2 v2 VK_KHR_get_surface_capabilities2 v1 VK_KHR_imageless_framebuffer v1 VK_KHR_image_format_list v1 VK_KHR_incremental_present v2 VK_KHR_maintenance1 v2 VK_KHR_maintenance2 v1 VK_KHR_maintenance3 v1 VK_KHR_map_memory2 v1 VK_KHR_multiview v1 VK_KHR_portability_subset v1 VK_KHR_push_descriptor v2 VK_KHR_relaxed_block_layout v1 VK_KHR_sampler_mirror_clamp_to_edge v3 VK_KHR_sampler_ycbcr_conversion v14 VK_KHR_separate_depth_stencil_layouts v1 VK_KHR_shader_draw_parameters v1 VK_KHR_shader_float_controls v4 VK_KHR_shader_float16_int8 v1 VK_KHR_shader_integer_dot_product v1 VK_KHR_shader_non_semantic_info v1 VK_KHR_shader_subgroup_extended_types v1 VK_KHR_spirv_1_4 v1 VK_KHR_storage_buffer_storage_class v1 VK_KHR_surface v25 VK_KHR_swapchain v70 VK_KHR_swapchain_mutable_format v1 VK_KHR_synchronization2 v1 VK_KHR_timeline_semaphore v2 VK_KHR_uniform_buffer_standard_layout v1 VK_KHR_variable_pointers v1 VK_KHR_vertex_attribute_divisor v1 VK_EXT_4444_formats v1 VK_EXT_buffer_device_address v2 VK_EXT_calibrated_timestamps v2 VK_EXT_debug_marker v4 VK_EXT_debug_report v10 VK_EXT_debug_utils v2 VK_EXT_descriptor_indexing v2 VK_EXT_extended_dynamic_state v1 VK_EXT_extended_dynamic_state2 v1 VK_EXT_extended_dynamic_state3 v2 VK_EXT_external_memory_host v1 VK_EXT_fragment_shader_interlock v1 VK_EXT_hdr_metadata v3 VK_EXT_headless_surface v1 VK_EXT_host_image_copy v1 VK_EXT_host_query_reset v1 VK_EXT_image_robustness v1 VK_EXT_inline_uniform_block v1 VK_EXT_layer_settings v2 VK_EXT_memory_budget v1 VK_EXT_metal_objects v2 VK_EXT_metal_surface v1 VK_EXT_pipeline_creation_cache_control v3 VK_EXT_pipeline_creation_feedback v1 VK_EXT_post_depth_coverage v1 VK_EXT_private_data v1 VK_EXT_robustness2 v1 VK_EXT_sample_locations v1 VK_EXT_scalar_block_layout v1 VK_EXT_separate_stencil_usage v1 VK_EXT_shader_atomic_float v1 VK_EXT_shader_demote_to_helper_invocation v1 VK_EXT_shader_stencil_export v1 VK_EXT_shader_subgroup_ballot v1 VK_EXT_shader_subgroup_vote v1 VK_EXT_shader_viewport_index_layer v1 VK_EXT_subgroup_size_control v2 VK_EXT_surface_maintenance1 v1 VK_EXT_swapchain_colorspace v5 VK_EXT_swapchain_maintenance1 v1 VK_EXT_texel_buffer_alignment v1 VK_EXT_texture_compression_astc_hdr v1 VK_EXT_vertex_attribute_divisor v3 VK_AMD_gpu_shader_half_float v2 VK_AMD_negative_viewport_height v1 VK_AMD_shader_image_load_store_lod v1 VK_AMD_shader_trinary_minmax v1 VK_IMG_format_pvrtc v1 VK_INTEL_shader_integer_functions2 v1 VK_GOOGLE_display_timing v1 VK_MVK_macos_surface v3 VK_MVK_moltenvk v37 VK_NV_fragment_shader_barycentric v1 VK_NV_glsl_shader v1 [mvk-info] maximumConcurrentCompilationTaskCount 2 [mvk-info] GPU device: model: Apple M3 Pro type: Integrated vendorID: 0x106b deviceID: 0xe060209 pipelineCacheUUID: B6498B25-0E06-0209-0000-000100000000 GPU memory available: 27648 MB GPU memory used: 0 MB Metal Shading Language 3.1 supports the following GPU Features: GPU Family Metal 3 GPU Family Apple 9 GPU Family Mac 2 Read-Write Texture Tier 2 [mvk-info] Created VkInstance for Vulkan version 1.2.0, as requested by app, with the following 0 Vulkan extensions enabled: [mvk-trace] vkEnumeratePhysicalDevices() [mvk-trace] vkGetPhysicalDeviceQueueFamilyProperties() [mvk-trace] vkGetPhysicalDeviceQueueFamilyProperties() [mvk-trace] vkEnumerateDeviceExtensionProperties() [mvk-trace] vkEnumerateDeviceExtensionProperties() [mvk-trace] vkGetPhysicalDeviceFeatures2() [mvk-trace] vkCreateDevice() [mvk-info] Vulkan semaphores using MTLEvent. [mvk-info] Descriptor sets binding resources using Metal3 argument buffers. flock failed to lock list file (/var/folders/kh/8qqd4nm13p38jr0jkt26nzgw0000gn/C//com.apple.metal/32023/libraries.list): errno = 35 [mvk-info] Created VkDevice to run on GPU Apple M3 Pro with the following 2 Vulkan extensions enabled: VK_KHR_portability_subset v1 VK_EXT_descriptor_indexing v2 [mvk-trace] vkGetDeviceQueue() [mvk-trace] vkCreateDescriptorSetLayout() [mvk-debug] Created VkDescriptorSetLayout 0x600003760240 with 1 bindings: 0: VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER with up to 2 elements at binding 0 [mvk-trace] vkCreatePipelineLayout() [mvk-debug] Created VkPipelineLayout 0x13d10ac70 with 1 descriptor set layouts: 0: 0x600003760240 [mvk-trace] vkCreateShaderModule() [mvk-trace] vkCreateComputePipelines() [mvk-info] Converting SPIR-V: ; SPIR-V ; Version: 1.3 ; Generator: Khronos SPIR-V Tools Assembler; 0 ; Bound: 41 ; Schema: 0 OpCapability Shader OpCapability SampledBuffer OpCapability ImageBuffer OpCapability StorageImageReadWithoutFormat OpCapability StorageImageWriteWithoutFormat OpCapability RuntimeDescriptorArray OpCapability StorageTexelBufferArrayDynamicIndexing OpExtension "SPV_EXT_descriptor_indexing" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %1 "main" OpExecutionMode %1 LocalSize 1 1 1 OpName %1 "main" OpName %2 "r0" OpName %3 "u1" OpName %4 "push_cb_struct" OpName %5 "push_cb" OpDecorate %3 DescriptorSet 0 OpDecorate %3 Binding 0 OpDecorate %6 ArrayStride 4 OpDecorate %4 Block OpMemberDecorate %4 0 Offset 0 %7 = OpTypeVoid %8 = OpTypeFunction %7 %9 = OpTypeFloat 32 %10 = OpTypeVector %9 4 %11 = OpTypePointer Private %10 %2 = OpVariable %11 Private %12 = OpTypeInt 32 0 %13 = OpTypeImage %12 Buffer 0 0 0 2 Unknown %14 = OpTypeRuntimeArray %13 %15 = OpTypePointer UniformConstant %14 %3 = OpVariable %15 UniformConstant %16 = OpConstant %12 2 %6 = OpTypeArray %12 %16 %17 = OpConstant %12 0 %4 = OpTypeStruct %6 %18 = OpTypePointer PushConstant %4 %5 = OpVariable %18 PushConstant %19 = OpTypePointer PushConstant %12 %20 = OpTypePointer UniformConstant %13 %21 = OpTypeVector %12 4 %22 = OpTypeInt 32 1 %23 = OpConstant %22 0 %24 = OpTypePointer Private %9 %1 = OpFunction %7 None %8 %25 = OpLabel %26 = OpInBoundsAccessChain %19 %5 %17 %17 %27 = OpLoad %12 %26 %28 = OpIAdd %12 %17 %27 %29 = OpAccessChain %20 %3 %28 %30 = OpLoad %13 %29 %31 = OpImageRead %21 %30 %23 %32 = OpCompositeExtract %12 %31 0 %33 = OpBitcast %9 %32 %34 = OpInBoundsAccessChain %24 %2 %17 OpStore %34 %33 %35 = OpIAdd %12 %17 %27 %36 = OpAccessChain %20 %3 %35 %37 = OpLoad %13 %36 %38 = OpLoad %10 %2 %39 = OpVectorShuffle %10 %38 %38 0 0 0 0 %40 = OpBitcast %21 %39 OpImageWrite %37 %23 %40 OpReturn OpFunctionEnd End SPIR-V Converted MSL: #pragma clang diagnostic ignored "-Wmissing-prototypes" #include #include using namespace metal; template void spvImageFence(ImageT img) { img.fence(); } struct push_cb_struct { uint _m0[2]; }; struct spvDescriptorSetBuffer0 { array, 1> u1 [[id(0)]]; }; kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant push_cb_struct& push_cb [[buffer(8)]]) { uint _28 = 0u + push_cb._m0[0u]; spvImageFence(spvDescriptorSet0.u1[_28]); float4 r0; r0.x = as_type(spvDescriptorSet0.u1[_28].read(uint(0)).x); spvDescriptorSet0.u1[0u + push_cb._m0[0u]].write(as_type(r0.xxxx), uint(0)); } End MSL Estimated original GLSL: #version 450 #extension GL_EXT_nonuniform_qualifier : require #extension GL_EXT_shader_image_load_formatted : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(push_constant, std430) uniform push_cb_struct { uint _m0[2]; } push_cb; layout(set = 0, binding = 0) uniform uimageBuffer u1[]; vec4 r0; void main() { r0.x = uintBitsToFloat(imageLoad(u1[0u + push_cb._m0[0u]], 0).x); imageStore(u1[0u + push_cb._m0[0u]], 0, floatBitsToUint(r0.xxxx)); } End GLSL [mvk-info] Compiling Metal shader with FastMath enabled. flock failed to lock list file (/var/folders/kh/8qqd4nm13p38jr0jkt26nzgw0000gn/C//com.apple.metal/16777235_419/functions.list): errno = 35 [mvk-trace] vkCreateBuffer() [mvk-trace] vkGetBufferMemoryRequirements() [mvk-trace] vkGetPhysicalDeviceMemoryProperties() [mvk-trace] vkAllocateMemory() [mvk-trace] vkBindBufferMemory() [mvk-trace] vkCreateBufferView() [mvk-trace] vkCreateBuffer() [mvk-trace] vkGetBufferMemoryRequirements() [mvk-trace] vkGetPhysicalDeviceMemoryProperties() [mvk-trace] vkAllocateMemory() [mvk-trace] vkBindBufferMemory() [mvk-trace] vkCreateBufferView() [mvk-trace] vkCreateDescriptorPool() [mvk-debug] Created VkDescriptorPool 0x13d81cc00 with 1 descriptor sets, and pooled descriptors: VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 2 (2 remaining) [mvk-trace] vkAllocateDescriptorSets() [mvk-trace] vkUpdateDescriptorSets() [mvk-trace] vkUpdateDescriptorSets() [mvk-trace] vkCreateCommandPool() [mvk-trace] vkAllocateCommandBuffers() [mvk-trace] vkBeginCommandBuffer() [mvk-trace] vkCmdBindPipeline() [mvk-trace] vkCmdBindDescriptorSets() [mvk-trace] vkCmdPushConstants() [mvk-trace] vkCmdDispatch() [mvk-trace] vkEndCommandBuffer() [mvk-trace] vkQueueSubmit() [mvk-trace] vkQueueWaitIdle() Program ended with exit code: 0```