Sub optimal optimization for many or for eqaul tests

llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.

http://llvm.org

Other

29.01k stars 11.95k forks source link

Sub optimal optimization for many or for eqaul tests #61043

Open rockeet opened 1 year ago

rockeet commented 1 year ago

bool many_or(unsigned int val) {
    __builtin_assume(val < 32);
    return val == 1 || val == 4 || val == 7 || val == 9 || val == 10 || val == 13;
}

produces code:

many_or(unsigned int):                            # @many_or(unsigned int)
        cmp     edi, 14
        setb    cl
        mov     eax, 9874
        bt      eax, edi
        setb    al
        and     al, cl
        ret

The cmp with 14 is redundant, the optimal code should be:

many_or(unsigned int):                            # @many_or(unsigned int)
        mov     eax, 9874
        bt      eax, edi
        setb    al
        ret

RKSimon commented 1 year ago

define zeroext i1 @many_or(i32 noundef %val) {
entry:
  %cmp = icmp ult i32 %val, 32
  tail call void @llvm.assume(i1 %cmp)
  %0 = icmp ult i32 %val, 14
  %switch.cast = trunc i32 %val to i14
  %switch.downshift = lshr i14 -6510, %switch.cast
  %1 = and i14 %switch.downshift, 1
  %switch.masked = icmp ne i14 %1, 0
  %2 = select i1 %0, i1 %switch.masked, i1 false
  ret i1 %2
}
declare void @llvm.assume(i1 noundef)

RKSimon commented 1 year ago

define zeroext i1 @src(i32 noundef %val) {
entry:
  %cmp = icmp ult i32 %val, 32
  tail call void @llvm.assume(i1 %cmp)
  %0 = icmp ult i32 %val, 14
  %switch.cast = trunc i32 %val to i14
  %switch.downshift = lshr i14 -6510, %switch.cast
  %1 = and i14 %switch.downshift, 1
  %switch.masked = icmp ne i14 %1, 0
  %2 = select i1 %0, i1 %switch.masked, i1 false
  ret i1 %2
}

define zeroext i1 @tgt(i32 noundef %val) {
entry:
  %cmp = icmp ult i32 %val, 32
  tail call void @llvm.assume(i1 %cmp)
  %switch.downshift = lshr i32 9874, %val
  %0 = and i32 %switch.downshift, 1
  %switch.masked = icmp ne i32 %0, 0
  ret i1 %switch.masked
}

declare void @llvm.assume(i1 noundef)

Transformation seems to be correct!

rockeet commented 1 year ago

define zeroext i1 @many_or(i32 noundef %val) {
entry:
  %cmp = icmp ult i32 %val, 32
  tail call void @llvm.assume(i1 %cmp)
  %0 = icmp ult i32 %val, 14
  %switch.cast = trunc i32 %val to i14
  %switch.downshift = lshr i14 -6510, %switch.cast
  %1 = and i14 %switch.downshift, 1
  %switch.masked = icmp ne i14 %1, 0
  %2 = select i1 %0, i1 %switch.masked, i1 false
  ret i1 %2
}
declare void @llvm.assume(i1 noundef)

In my code example, if __builtin_assume(val < 32) is replaced with __builtin_assume(val < 14), the output code is optimal.

In your solution, it should also be fixed for 64 bits: if we write __builtin_assume(val < 64);, it should use uint64.