llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
26.85k stars 11.02k forks source link

Missed loop simplifications may be due to failing to identify loop invariants #72626

Open ZY546 opened 7 months ago

ZY546 commented 7 months ago

Hello, we notice in the code below that size_ is a loop invariant (if size_==0 then it won't go into the loop).

https://godbolt.org/z/Wxo38e4jG

int x,y;
void func(int size_){
    for(int i=1;i<size_+1;i++){
        if(size_){
            size_++;
            x++;
        }
        size_--;
        y++;
    }
}

But Clang -O3 -fwrapv:

func(int):                               # @func(int)
        lea     eax, [rdi + 1]
        cmp     eax, 2
        jl      .LBB0_5
        mov     eax, dword ptr [rip + y]
        xor     ecx, ecx
        mov     edx, dword ptr [rip + x]
        jmp     .LBB0_2
.LBB0_6:                                # %if.then
        inc     edx
        mov     dword ptr [rip + x], edx
        inc     ecx
        cmp     ecx, edi
        jge     .LBB0_4
.LBB0_2:                                # %for.body
        test    edi, edi
        jne     .LBB0_6
        mov     edi, -1
        inc     ecx
        cmp     ecx, edi
        jl      .LBB0_2
.LBB0_4:                                # %for.cond.for.cond.cleanup_crit_edge
        add     eax, ecx
        mov     dword ptr [rip + y], eax
.LBB0_5:                                # %for.cond.cleanup
        ret
x:
        .long   0                               # 0x0

y:
        .long   0                               # 0x0

GCC -O3 -fwrapv:

func(int):
        lea     eax, [rdi+1]
        cmp     eax, 1
        jle     .L1
        add     DWORD PTR x[rip], edi
        add     DWORD PTR y[rip], edi
.L1:
        ret
y:
        .zero   4
x:
        .zero   4
XChy commented 7 months ago

Reduced:

declare void @dummy()

define void @func(i32 noundef %size_) {
entry:
  %add = add i32 %size_, 1
  %cmp12 = icmp sgt i32 %add, 1
  br i1 %cmp12, label %for.body, label %for.cond.cleanup

for.body:                                         ; preds = %entry, %if.end
  %i = phi i32 [ %inc, %if.end ], [ 1, %entry ]
  %size.iv = phi i32 [ %size.next, %if.end ], [ %size_, %entry ]
  %tobool.not = icmp eq i32 %size.iv, 0
  br i1 %tobool.not, label %if.end, label %if.then

if.then:                                          ; preds = %for.body
  tail call void @dummy()
  br label %if.end

if.end:                                           ; preds = %if.then, %for.body
  %size.next = phi i32 [ %size.iv, %if.then ], [ -1, %for.body ]
  %inc = add nuw nsw i32 %i, 1
  %cmp = icmp slt i32 %i, %size.next
  br i1 %cmp, label %for.body, label %for.cond.cleanup

for.cond.cleanup:                                 ; preds = %if.end, %entry
  ret void
}

Proof: https://alive2.llvm.org/ce/z/rHVcBH

XChy commented 1 month ago

For this example with -fwrapv flag, the root cause should be that GVN fails to propagate complex condition 1 < size_ + 1 and infer that size_t != 0 is false. A possible fix is to take such complex condition pattern into account.