llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.3k stars 11.2k forks source link

Redundant branches with ctlz and cttz #47467

Open Diggsey opened 3 years ago

Diggsey commented 3 years ago
Bugzilla Link 48123
Version trunk
OS Windows NT
CC @topperc,@RKSimon,@phoebewang,@rotateright

Extended Description

Rust code:

pub fn can_represent_as_f64(x: u64) -> bool {
    x.leading_zeros() + x.trailing_zeros() >= 11
}

LLVM IR:

define zeroext i1 @_ZN10playground20can_represent_as_f6417h8c9d47bab619cb5fE(i64 %x) {
start:
  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
  %1 = trunc i64 %0 to i32
  %2 = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
  %3 = trunc i64 %2 to i32
  %_2 = add nuw nsw i32 %1, %3
  %4 = icmp ugt i32 %_2, 10
  ret i1 %4
}

Assembly:

playground::can_represent_as_f64:
    mov eax, 64
    mov ecx, 64
    test    rdi, rdi    ; Initial test for zero and branch
    je  .LBB0_2
    bsr rcx, rdi
    xor rcx, 63

.LBB0_2:
    test    rdi, rdi    ; Second test for zero and branch
    je  .LBB0_4
    bsf rax, rdi

.LBB0_4:
    add ecx, eax
    cmp ecx, 10
    seta    al
    ret

Instead of performing the comparison twice, the code should immediately branch to LBB0_4.

rotateright commented 3 years ago

Note that this should not be an issue when compiling for more recent x86:

lzcntq  %rdi, %rax
tzcntq  %rdi, %rcx
addl    %eax, %ecx
cmpl    $10, %ecx
seta    %al
retq
rotateright commented 3 years ago

We expand the intrinsics in -codegenprepare, and I'm not sure where we would solve this. machine-cse seems like the most likely candidate, but it would require tracking eflags state across basic blocks. Not sure if we do that:

    TEST64rr %5, %5, implicit-def $eflags
    JCC_1 %bb.2, 4, implicit $eflags

IR going into SDAG:

  define zeroext i1 @_ZN10playground20can_represent_as_f6417h8c9d47bab619cb5fE(i64 %x) unnamed_addr {
  start:
    %cmpz = icmp eq i64 %x, 0
    br i1 %cmpz, label %cond.end, label %cond.false

  cond.false:                                       ; preds = %start
    %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
    br label %cond.end

  cond.end:                                         ; preds = %start, %cond.false
    %ctz = phi i64 [ 64, %start ], [ %0, %cond.false ]
    %1 = trunc i64 %ctz to i32
    %cmpz3 = icmp eq i64 %x, 0
    br i1 %cmpz3, label %cond.end2, label %cond.false1

  cond.false1:                                      ; preds = %cond.end
    %2 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
    br label %cond.end2

  cond.end2:                                        ; preds = %cond.end, %cond.false1
    %ctz4 = phi i64 [ 64, %cond.end ], [ %2, %cond.false1 ]
    %3 = trunc i64 %ctz4 to i32
    %_2 = add nuw nsw i32 %1, %3
    %4 = icmp ugt i32 %_2, 10
    ret i1 %4
  }