Missed optimization: useless for-loop must be eliminated

llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.

Other

27.77k stars 11.44k forks source link


Bugzilla Link	39515
Version	trunk
OS	Linux
CC	@DougGregor,@hfinkel,@zygoloid

Bugzilla Link

39515

Version

trunk

Linux

@DougGregor,@hfinkel,@zygoloid

Extended Description

clang(trunk) with '-O3 -std=c++17' for this code:

#include <vector> #include <algorithm> int foo(std::vector<int> v) { int l = v[0]; for(const auto& x : v) { l = std::min(l, x); } for(const auto& x : v) { l = std::max(l, x); } return l; }

gcc doesn't eliminate first loop, but gcc can, because first loop has no effect in this function.

Same result for the version without std::vector and std::max:

int min(int a, int b) { return a < b ? a : b; } int max(int a, int b) { return a > b ? a : b; } int foo(int* v, int size) { int l = v[0]; for(int i=0; i < size; ++i) { l = min(l, v[i]); } for(int i=0; i < size; ++i) { l = max(l, v[i]); } return l; }

This behavior is still the same with post 17 trunk(8649328060b4e748502d1d859f9c9c1bd3c2bccc) https://godbolt.org/z/EYfqqcf4s

The code (slightly modified to give simpler assembly)


static int min(int a, int b)
{
    return a < b ? a : b;
}

static int max(int a, int b)
{
    return a > b ? a : b;
}

int foo(int* v) {
    const int size = 32;
    int l = v[0];
    #pragma clang loop vectorize(disable) unroll(disable)
    for(int i=0; i < size; ++i)
    {
        l = min(l, v[i]);
    }
    #pragma clang loop vectorize(disable) unroll(disable)
    for(int i=0; i < size; ++i)
    {
        l = max(l, v[i]);
    }

    return l;
}

Assembly

foo(int*):                               # @foo(int*)
        mov     eax, dword ptr [rdi]
        xor     ecx, ecx
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        mov     edx, eax
        mov     eax, dword ptr [rdi + 4*rcx]
        cmp     edx, eax
        cmovl   eax, edx
        inc     rcx
        cmp     rcx, 32
        jne     .LBB0_1
        xor     ecx, ecx
.LBB0_3:                                # =>This Inner Loop Header: Depth=1
        mov     edx, eax
        mov     eax, dword ptr [rdi + 4*rcx]
        cmp     edx, eax
        cmovg   eax, edx
        inc     rcx
        cmp     rcx, 32
        jne     .LBB0_3
        ret

llvm / llvm-project

Missed optimization: useless for-loop must be eliminated #38863

Extended Description