Open kuhar opened 1 year ago
Minimized testcase:
func.func @reverse_dim0() {
%input = util.unfoldable_constant dense<[[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]]> : tensor<2x3xf32>
%init = tensor.empty() : tensor<2x3xf32>
%0 = iree_linalg_ext.reverse
dimensions(dense<0> : tensor<1xi64>)
ins(%input : tensor<2x3xf32>)
outs(%init : tensor<2x3xf32>) : tensor<2x3xf32>
check.expect_almost_eq_const(
%0,
dense<[[4.0, 5.0, 6.0], [1.0, 2.0, 3.0]]> : tensor<2x3xf32>
) : tensor<2x3xf32>
return
}
SPIR-V:
; SPIR-V
; Version: 1.0
; Generator: Khronos; 22
; Bound: 70
; Schema: 0
OpCapability Shader [1/32498]
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %_reverse_dim0_dispatch_0 "_reverse_dim0_dispatch_0" %__builtin_var_WorkgroupId__ %__builtin_var_NumWorkgroups__ %__builtin_var
_LocalInvocationId__
OpExecutionMode %_reverse_dim0_dispatch_0 LocalSize 64 1 1
OpName %__builtin_var_LocalInvocationId__ "__builtin_var_LocalInvocationId__"
OpName %__builtin_var_NumWorkgroups__ "__builtin_var_NumWorkgroups__"
OpName %__builtin_var_WorkgroupId__ "__builtin_var_WorkgroupId__"
OpName %__resource_var_0_0_ "__resource_var_0_0_"
OpName %__resource_var_0_1_ "__resource_var_0_1_"
OpName %_reverse_dim0_dispatch_0 "_reverse_dim0_dispatch_0"
OpDecorate %__builtin_var_LocalInvocationId__ BuiltIn LocalInvocationId
OpDecorate %__builtin_var_NumWorkgroups__ BuiltIn NumWorkgroups
OpDecorate %__builtin_var_WorkgroupId__ BuiltIn WorkgroupId
OpDecorate %_runtimearr_float ArrayStride 4
OpMemberDecorate %_struct_8 0 Offset 0
OpDecorate %_struct_8 Block
OpDecorate %__resource_var_0_0_ Binding 0
OpDecorate %__resource_var_0_0_ DescriptorSet 0
OpDecorate %__resource_var_0_1_ Binding 1
OpDecorate %__resource_var_0_1_ DescriptorSet 0
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%__builtin_var_LocalInvocationId__ = OpVariable %_ptr_Input_v3uint Input
%__builtin_var_NumWorkgroups__ = OpVariable %_ptr_Input_v3uint Input
%__builtin_var_WorkgroupId__ = OpVariable %_ptr_Input_v3uint Input
%float = OpTypeFloat 32
%_runtimearr_float = OpTypeRuntimeArray %float
%_struct_8 = OpTypeStruct %_runtimearr_float
%_ptr_StorageBuffer__struct_8 = OpTypePointer StorageBuffer %_struct_8
%__resource_var_0_0_ = OpVariable %_ptr_StorageBuffer__struct_8 StorageBuffer
%__resource_var_0_1_ = OpVariable %_ptr_StorageBuffer__struct_8 StorageBuffer
%void = OpTypeVoid
%13 = OpTypeFunction %void
%uint_4294967293 = OpConstant %uint 4294967293
%uint_64 = OpConstant %uint 64
%uint_3 = OpConstant %uint 3
%uint_2 = OpConstant %uint 2
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
%_reverse_dim0_dispatch_0 = OpFunction %void None %13
%16 = OpLabel
%22 = OpLoad %v3uint %__builtin_var_WorkgroupId__
%23 = OpCompositeExtract %uint %22 0
%24 = OpLoad %v3uint %__builtin_var_NumWorkgroups__
%25 = OpCompositeExtract %uint %24 0
%26 = OpLoad %v3uint %__builtin_var_WorkgroupId__
%27 = OpCompositeExtract %uint %26 1
%28 = OpLoad %v3uint %__builtin_var_NumWorkgroups__
%29 = OpCompositeExtract %uint %28 1
%30 = OpIMul %uint %23 %uint_64
%31 = OpIMul %uint %25 %uint_64
%32 = OpLoad %v3uint %__builtin_var_LocalInvocationId__
%33 = OpCompositeExtract %uint %32 0
%34 = OpLoad %v3uint %__builtin_var_LocalInvocationId__
%35 = OpCompositeExtract %uint %34 1
OpBranch %36
%36 = OpLabel
%39 = OpPhi %uint %69 %40 %27 %16
%42 = OpSLessThan %bool %39 %uint_2
OpLoopMerge %38 %37 None
OpBranchConditional %42 %37 %38
%37 = OpLabel
OpBranch %43
%43 = OpLabel
%45 = OpPhi %uint %68 %46 %30 %37
%47 = OpSLessThan %bool %45 %uint_3
OpLoopMerge %40 %44 None
OpBranchConditional %47 %44 %40
%44 = OpLabel
OpBranch %48
%48 = OpLabel
%50 = OpPhi %uint %67 %49 %33 %44
%51 = OpSLessThan %bool %50 %uint_3
OpLoopMerge %46 %49 None
OpBranchConditional %51 %49 %46
%49 = OpLabel
%52 = OpIMul %uint %39 %uint_3
%53 = OpIAdd %uint %50 %52
%54 = OpIMul %uint %35 %uint_3
%55 = OpIAdd %uint %53 %54
%56 = OpIAdd %uint %55 %45
%58 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_0_ %uint_0 %56
%59 = OpLoad %float %58
%60 = OpIMul %uint %35 %uint_4294967293
%61 = OpIAdd %uint %50 %60
%62 = OpIMul %uint %39 %uint_4294967293
%63 = OpIAdd %uint %61 %62
%64 = OpIAdd %uint %63 %45
%65 = OpIAdd %uint %64 %uint_3
%66 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_1_ %uint_0 %65
OpStore %66 %59
%67 = OpIAdd %uint %50 %uint_64
OpBranch %48
%46 = OpLabel
%68 = OpIAdd %uint %45 %31
OpBranch %43
%40 = OpLabel
%69 = OpIAdd %uint %39 %29
OpBranch %36
%38 = OpLabel
OpReturn
OpFunctionEnd
GLSL from spirv-cross (just for convenience):
#version 450
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0, std430) buffer _resource_var_0_0_
{
float _m0[];
} _resource_var_0_0_1;
layout(set = 0, binding = 1, std430) buffer _resource_var_0_1_
{
float _m0[];
} _resource_var_0_1_1;
void main()
{
uint _30 = gl_WorkGroupID.x * 64u;
uint _31 = gl_NumWorkGroups.x * 64u;
uint _39;
_39 = gl_WorkGroupID.y;
uint _45;
for (;;)
{
if (int(_39) < int(2u))
{
_45 = _30;
uint _50;
for (;;)
{
if (int(_45) < int(3u))
{
_50 = gl_LocalInvocationID.x;
for (; int(_50) < int(3u); )
{
_resource_var_0_1_1._m0[(((_50 + (gl_LocalInvocationID.y * 4294967293u)) + (_39 * 4294967293u)) + _45) + 3u] = _resource_var_0_0_1._m0[((_50 + (_39 * 3u)) + (gl_LocalInvocationID.y * 3u)) + _45];
_50 += 64u;
continue;
}
_45 += _31;
continue;
}
else
{
break;
}
}
_39 += gl_NumWorkGroups.y;
continue;
}
else
{
break;
}
}
}
Repro as an Amber script:
./amber reverse.amber -d -t vulkan1.1spv1.3
#!amber
DEVICE_EXTENSION VK_KHR_storage_buffer_storage_class
SHADER compute repro SPIRV-ASM
OpCapability Shader
OpExtension "SPV_KHR_storage_buffer_storage_class"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %_reverse_dim0_dispatch_0 "_reverse_dim0_dispatch_0" %__builtin_var_WorkgroupId__ %__builtin_var_NumWorkgroups__ %__builtin_var_LocalInvocationId__
OpExecutionMode %_reverse_dim0_dispatch_0 LocalSize 64 1 1
OpName %__builtin_var_LocalInvocationId__ "__builtin_var_LocalInvocationId__"
OpName %__builtin_var_NumWorkgroups__ "__builtin_var_NumWorkgroups__"
OpName %__builtin_var_WorkgroupId__ "__builtin_var_WorkgroupId__"
OpName %__resource_var_0_0_ "__resource_var_0_0_"
OpName %__resource_var_0_1_ "__resource_var_0_1_"
OpName %_reverse_dim0_dispatch_0 "_reverse_dim0_dispatch_0"
OpDecorate %__builtin_var_LocalInvocationId__ BuiltIn LocalInvocationId
OpDecorate %__builtin_var_NumWorkgroups__ BuiltIn NumWorkgroups
OpDecorate %__builtin_var_WorkgroupId__ BuiltIn WorkgroupId
OpDecorate %_runtimearr_float ArrayStride 4
OpMemberDecorate %_struct_8 0 Offset 0
OpDecorate %_struct_8 Block
OpDecorate %__resource_var_0_0_ Binding 0
OpDecorate %__resource_var_0_0_ DescriptorSet 0
OpDecorate %__resource_var_0_1_ Binding 1
OpDecorate %__resource_var_0_1_ DescriptorSet 0
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%__builtin_var_LocalInvocationId__ = OpVariable %_ptr_Input_v3uint Input
%__builtin_var_NumWorkgroups__ = OpVariable %_ptr_Input_v3uint Input
%__builtin_var_WorkgroupId__ = OpVariable %_ptr_Input_v3uint Input
%float = OpTypeFloat 32
%_runtimearr_float = OpTypeRuntimeArray %float
%_struct_8 = OpTypeStruct %_runtimearr_float
%_ptr_StorageBuffer__struct_8 = OpTypePointer StorageBuffer %_struct_8
%__resource_var_0_0_ = OpVariable %_ptr_StorageBuffer__struct_8 StorageBuffer
%__resource_var_0_1_ = OpVariable %_ptr_StorageBuffer__struct_8 StorageBuffer
%void = OpTypeVoid
%13 = OpTypeFunction %void
%uint_4294967293 = OpConstant %uint 4294967293
%uint_64 = OpConstant %uint 64
%uint_3 = OpConstant %uint 3
%uint_2 = OpConstant %uint 2
%uint_0 = OpConstant %uint 0
%bool = OpTypeBool
%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
%_reverse_dim0_dispatch_0 = OpFunction %void None %13
%16 = OpLabel
%22 = OpLoad %v3uint %__builtin_var_WorkgroupId__
%23 = OpCompositeExtract %uint %22 0
%24 = OpLoad %v3uint %__builtin_var_NumWorkgroups__
%25 = OpCompositeExtract %uint %24 0
%26 = OpLoad %v3uint %__builtin_var_WorkgroupId__
%27 = OpCompositeExtract %uint %26 1
%28 = OpLoad %v3uint %__builtin_var_NumWorkgroups__
%29 = OpCompositeExtract %uint %28 1
%30 = OpIMul %uint %23 %uint_64
%31 = OpIMul %uint %25 %uint_64
%32 = OpLoad %v3uint %__builtin_var_LocalInvocationId__
%33 = OpCompositeExtract %uint %32 0
%34 = OpLoad %v3uint %__builtin_var_LocalInvocationId__
%35 = OpCompositeExtract %uint %34 1
OpBranch %36
%36 = OpLabel
%39 = OpPhi %uint %69 %40 %27 %16
%42 = OpSLessThan %bool %39 %uint_2
OpLoopMerge %38 %37 None
OpBranchConditional %42 %37 %38
%37 = OpLabel
OpBranch %43
%43 = OpLabel
%45 = OpPhi %uint %68 %46 %30 %37
%47 = OpSLessThan %bool %45 %uint_3
OpLoopMerge %40 %44 None
OpBranchConditional %47 %44 %40
%44 = OpLabel
OpBranch %48
%48 = OpLabel
%50 = OpPhi %uint %67 %49 %33 %44
%51 = OpSLessThan %bool %50 %uint_3
OpLoopMerge %46 %49 None
OpBranchConditional %51 %49 %46
%49 = OpLabel
%52 = OpIMul %uint %39 %uint_3
%53 = OpIAdd %uint %50 %52
%54 = OpIMul %uint %35 %uint_3
%55 = OpIAdd %uint %53 %54
%56 = OpIAdd %uint %55 %45
%58 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_0_ %uint_0 %56
%59 = OpLoad %float %58
%60 = OpIMul %uint %35 %uint_4294967293
%61 = OpIAdd %uint %50 %60
%62 = OpIMul %uint %39 %uint_4294967293
%63 = OpIAdd %uint %61 %62
%64 = OpIAdd %uint %63 %45
%65 = OpIAdd %uint %64 %uint_3
%66 = OpAccessChain %_ptr_StorageBuffer_float %__resource_var_0_1_ %uint_0 %65
OpStore %66 %59
%67 = OpIAdd %uint %50 %uint_64
OpBranch %48
%46 = OpLabel
%68 = OpIAdd %uint %45 %31
OpBranch %43
%40 = OpLabel
%69 = OpIAdd %uint %39 %29
OpBranch %36
%38 = OpLabel
OpReturn
OpFunctionEnd
END
BUFFER buf0 DATA_TYPE float DATA
1 2 3 4 5 6
END
BUFFER buf1 DATA_TYPE float DATA
0 0 0 0 0 0
END
PIPELINE compute pipeline
ATTACH repro ENTRY_POINT _reverse_dim0_dispatch_0
BIND BUFFER buf0 AS storage DESCRIPTOR_SET 0 BINDING 0
BIND BUFFER buf1 AS storage DESCRIPTOR_SET 0 BINDING 1
END
RUN pipeline 1 2 1
EXPECT buf1 IDX 0 EQ 4.0 5.0 6.0 1.0 2.0 3.0
The minimized test cases (both IREE and Amber) also fail on Moto E w/ Adreno. The original one only fails on Pixel 6. I updated all snippets to match the original testcase.
The test passes with robustBufferAccess
enabled, both under IREE and Amber.
Might be able to close this after https://github.com/iree-org/iree/pull/17866 removed LinalgExt::ReverseOp
. Can check if the new lowering path fails on Android.
What happened?
The two reverse tests fail on Pixel 6 with newer Android builds. I first observed this on a build from ~July 2022 and confirmed this today on the newest build
230205.002
.These work fine on older builds used by the Pixel 6 Pro in the lab.
Steps to reproduce your issue
TQ1A.230205.002
What component(s) does this issue relate to?
Compiler
Version information
No response
Additional context
No response