llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.77k stars 11.44k forks source link

Missed optimization: useless for-loop must be eliminated #38863

Open zamazan4ik opened 5 years ago

zamazan4ik commented 5 years ago
Bugzilla Link 39515
Version trunk
OS Linux
CC @DougGregor,@hfinkel,@zygoloid

Extended Description

clang(trunk) with '-O3 -std=c++17' for this code:

#include <vector>
#include <algorithm>

int foo(std::vector<int> v) {
    int l = v[0];
    for(const auto& x : v) {
        l = std::min(l, x);
    }

    for(const auto& x : v) {
        l = std::max(l, x);
    }

    return l;
}

gcc doesn't eliminate first loop, but gcc can, because first loop has no effect in this function.

Same result for the version without std::vector and std::max:

int min(int a, int b)
{
    return a < b ? a : b;
}

int max(int a, int b)
{
    return a > b ? a : b;
}

int foo(int* v, int size) {
    int l = v[0];
    for(int i=0; i < size; ++i)
    {
        l = min(l, v[i]);
    }

    for(int i=0; i < size; ++i)
    {
        l = max(l, v[i]);
    }

    return l;
}
wheatman commented 7 months ago

This behavior is still the same with post 17 trunk(8649328060b4e748502d1d859f9c9c1bd3c2bccc) https://godbolt.org/z/EYfqqcf4s

The code (slightly modified to give simpler assembly)


static int min(int a, int b)
{
    return a < b ? a : b;
}

static int max(int a, int b)
{
    return a > b ? a : b;
}

int foo(int* v) {
    const int size = 32;
    int l = v[0];
    #pragma clang loop vectorize(disable) unroll(disable)
    for(int i=0; i < size; ++i)
    {
        l = min(l, v[i]);
    }
    #pragma clang loop vectorize(disable) unroll(disable)
    for(int i=0; i < size; ++i)
    {
        l = max(l, v[i]);
    }

    return l;
}

Assembly

foo(int*):                               # @foo(int*)
        mov     eax, dword ptr [rdi]
        xor     ecx, ecx
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        mov     edx, eax
        mov     eax, dword ptr [rdi + 4*rcx]
        cmp     edx, eax
        cmovl   eax, edx
        inc     rcx
        cmp     rcx, 32
        jne     .LBB0_1
        xor     ecx, ecx
.LBB0_3:                                # =>This Inner Loop Header: Depth=1
        mov     edx, eax
        mov     eax, dword ptr [rdi + 4*rcx]
        cmp     edx, eax
        cmovg   eax, edx
        inc     rcx
        cmp     rcx, 32
        jne     .LBB0_3
        ret