llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.76k stars 11.44k forks source link

Failure to combine rotate through a select #45243

Open RKSimon opened 4 years ago

RKSimon commented 4 years ago
Bugzilla Link 45898
Version trunk
OS Windows NT
CC @LebedevRI,@rotateright

Extended Description

Not sure whether this can be done in instcombine or must wait until DAG.

https://c.godbolt.org/z/Pam6xk

#include <stdint.h>
uint8_t rotl_2(uint8_t x) {
  uint8_t Xlo = (x<<2);
  uint8_t Xhi = (x>>6);
  uint8_t Xrot = Xlo|Xhi;
  return (x < 128 ? Xrot : Xlo);
}

we have a conditional use of a rotate pattern, which unfortunately gets broken up by the select

define zeroext i8 @rotl_2(i8 zeroext %0) {
  %2 = shl i8 %0, 2
  %3 = lshr i8 %0, 6
  %4 = icmp slt i8 %0, 0
  %5 = select i1 %4, i8 0, i8 %3
  %6 = or i8 %5, %2
  ret i8 %6
}

resulting in:

rotl_2:
        leal    (,%rdi,4), %ecx
        movl    %edi, %eax
        shrb    $6, %al
        xorl    %edx, %edx
        testb   %dil, %dil
        movzbl  %al, %eax
        cmovsl  %edx, %eax
        orb     %cl, %al
        retq

while gcc manages:

rotl_2:
        movl    %edi, %edx
        leal    0(,%rdi,4), %eax
        rolb    $2, %dl
        testb   %dil, %dil
        cmovns  %edx, %eax
        ret
rotateright commented 4 years ago

Also, we won't get the same x86 asm as gcc even with that transform in IR, so we may want to look at codegen first: https://c.godbolt.org/z/2d16-F

define zeroext i8 @rotl_better(i8 zeroext %x) {
  %l = shl i8 %x, 2
  %c = icmp slt i8 %x, 0
  %o = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 2)
  %s = select i1 %c, i8 %l, i8 %o
  ret i8 %s
}

rotl_better:                     
        leal    (,%rdi,4), %eax
        movl    %edi, %ecx
        rolb    $2, %cl
        testb   %dil, %dil
        movzbl  %al, %edx
        movzbl  %cl, %eax
        cmovsl  %edx, %eax
        retq
rotateright commented 4 years ago

If this is handled in IR, we'll need to canonicalize to the funnel shift intrinsic. We already have the opposite general transform:

define zeroext i8 @rotl(i8 zeroext %x) {
  %l = shl i8 %x, 2
  %r = lshr i8 %x, 6
  %c = icmp slt i8 %x, 0
  %o = or i8 %l, %r
  %s = select i1 %c, i8 %l, i8 %o
  ret i8 %s
}

Becomes:

define zeroext i8 @rotl_disguised(i8 zeroext %x) {
  %l = shl i8 %x, 2
  %r = lshr i8 %x, 6
  %c = icmp slt i8 %x, 0
  %s = select i1 %c, i8 0, i8 %r
  %o = or i8 %s, %l
  ret i8 %o
}