Closed nikic closed 1 month ago
Function test2 from https://github.com/llvm/llvm-project/blob/main/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c generates this pre-optimization IR:
define dso_local void @test2(ptr noundef %vqp, ptr noundef %vpp, <16 x i8> noundef %vc1, <16 x i8> noundef %vc2, ptr noundef %resp) #0 { entry: %vqp.addr = alloca ptr, align 8 %vpp.addr = alloca ptr, align 8 %vc1.addr = alloca <16 x i8>, align 16 %vc2.addr = alloca <16 x i8>, align 16 %resp.addr = alloca ptr, align 8 %vq = alloca <512 x i1>, align 64 %vp = alloca <256 x i1>, align 32 %res = alloca <256 x i1>, align 32 store ptr %vqp, ptr %vqp.addr, align 8, !tbaa !2 store ptr %vpp, ptr %vpp.addr, align 8, !tbaa !2 store <16 x i8> %vc1, ptr %vc1.addr, align 16, !tbaa !6 store <16 x i8> %vc2, ptr %vc2.addr, align 16, !tbaa !6 store ptr %resp, ptr %resp.addr, align 8, !tbaa !2 call void @llvm.lifetime.start.p0(i64 64, ptr %vq) #3 %0 = load ptr, ptr %vqp.addr, align 8, !tbaa !2 %1 = load <512 x i1>, ptr %0, align 64, !tbaa !7 store <512 x i1> %1, ptr %vq, align 64, !tbaa !7 call void @llvm.lifetime.start.p0(i64 32, ptr %vp) #3 %2 = load ptr, ptr %vpp.addr, align 8, !tbaa !2 %3 = load <256 x i1>, ptr %2, align 32, !tbaa !9 store <256 x i1> %3, ptr %vp, align 32, !tbaa !9 call void @llvm.lifetime.start.p0(i64 32, ptr %res) #3 %4 = load <16 x i8>, ptr %vc1.addr, align 16, !tbaa !6 %5 = load <16 x i8>, ptr %vc2.addr, align 16, !tbaa !6 %6 = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %5, <16 x i8> %4) store <256 x i1> %6, ptr %res, align 64 %7 = load <256 x i1>, ptr %res, align 32, !tbaa !9 %8 = load ptr, ptr %resp.addr, align 8, !tbaa !2 store <256 x i1> %7, ptr %8, align 32, !tbaa !9 call void @llvm.lifetime.end.p0(i64 32, ptr %res) #3 call void @llvm.lifetime.end.p0(i64 32, ptr %vp) #3 call void @llvm.lifetime.end.p0(i64 64, ptr %vq) #3 ret void }
Note that store <256 x i1> %6, ptr %res, align 64 performs a 64 aligned store of a 32 byte type to a 32 aligned alloca. Pretty sure this was supposed to get alignment 32.
store <256 x i1> %6, ptr %res, align 64
I think this is due to the hardcoded 64 alignment in https://github.com/llvm/llvm-project/blob/c2b92a4250b3f514685676ba8985ea73450f14d3/clang/lib/CodeGen/CGBuiltin.cpp#L18219.
@llvm/issue-subscribers-clang-codegen
Author: Nikita Popov (nikic)
@llvm/issue-subscribers-backend-powerpc
Function test2 from https://github.com/llvm/llvm-project/blob/main/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c generates this pre-optimization IR:
Note that
store <256 x i1> %6, ptr %res, align 64
performs a 64 aligned store of a 32 byte type to a 32 aligned alloca. Pretty sure this was supposed to get alignment 32.I think this is due to the hardcoded 64 alignment in https://github.com/llvm/llvm-project/blob/c2b92a4250b3f514685676ba8985ea73450f14d3/clang/lib/CodeGen/CGBuiltin.cpp#L18219.