llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
28.42k stars 11.74k forks source link

[ppc] Use cmplwi insead of rlwinm. to compare bool var #30181

Closed weiguozhi closed 6 years ago

weiguozhi commented 7 years ago
Bugzilla Link 30833
Resolution FIXED
Resolved on Mar 12, 2018 08:40
Version trunk
OS Linux
CC @hfinkel,@nemanjai

Extended Description

Compile the following code with options -m64 -O2

struct A { long V() const { return f4 ? f1: f2; } bool IsDone() const { return f3 >= V(); }

long f1, f2; 
int f3; 
bool f4; 

};

struct B { bool IsDone() const { return f6 >= f5.f1; }

void Next();

A f5; 
int f6; 

};

extern A qux(); extern B bar();

void B::Next() { while (f5.IsDone()) { f5 = qux(); } }

void foo() { B iter = bar(); while (!iter.IsDone()) { iter.Next(); }
}

LLVM generates:

    ...
    lwz 4, 112(31)
    lbz 5, 116(31)
    ld 7, 104(31)
    addi 29, 31, 128 
    .p2align        4

.LBB1_2: # %while.body

=>This Loop Header: Depth=1

                                    #     Child Loop BB1_3 Depth 2
    rlwinm. 8, 5, 0, 24, 31         // A
    extsw 8, 4
    isel 9, 7, 3, 2
    cmpd     8, 9
    blt      0, .LBB1_5
    .p2align        4

.LBB1_3: # %while.body.i

Parent Loop BB1_2 Depth=1

                                    # =>  This Inner Loop Header: Depth=2
    mr 3, 29
    bl _Z3quxv
    nop
    lxvd2x 0, 0, 29
    stxvd2x 0, 0, 30
    ori 2, 2, 0
    ld 3, 144(31)
    std 3, 112(31)
    ori 2, 2, 0
    lbz 5, 116(31)
    ld 3, 96(31)
    ld 7, 104(31)
    lwa 4, 112(31)
    cmplwi   5, 0                   // B
    isel 6, 7, 3, 2
    cmpd     4, 6
    bge 0, .LBB1_3

BB#4: # %while.cond.backedge.loopexit

                                    #   in Loop: Header=BB1_2 Depth=1
    lwz 6, 120(31)

.LBB1_5: # %while.cond.backedge

in Loop: Header=BB1_2 Depth=1

    extsw 8, 6
    cmpd     8, 3
    blt      0, .LBB1_2
    ...

Note that instruction A is actually comparing a bool variable loaded from 116(31), it can be replaced with a slightly faster instruction cmplwi, just like instruction B.

nemanjai commented 6 years ago

As of https://reviews.llvm.org/rL326736, we will emit an andi. here which is a faster instruction on P9.

Since andi. is not cracked, there is no advantage to emitting cmplwi on P9 so this PR can probably be closed now.