Quuxplusone / LLVMBugzillaTest

0 stars 0 forks source link

[MSP430][AVR][InstCombine][DAGCombine]Poor codegen for targets with no native shifts (8/8) #43015

Open Quuxplusone opened 5 years ago

Quuxplusone commented 5 years ago
Bugzilla Link PR44045
Status NEW
Importance P normal
Reported by Joan Lluch (joan.lluch@icloud.com)
Reported on 2019-11-18 03:10:30 -0800
Last modified on 2019-11-18 04:14:04 -0800
Version trunk
Hardware All All
CC joan.lluch@icloud.com, lebedev.ri@gmail.com, llvm-bugs@lists.llvm.org, spatel+llvm@rotateright.com
Fixed by commit(s)
Attachments
Blocks
Blocked by
See also PR44044, PR44043, PR44042, PR44041, PR44040, PR44039, PR44038
A number of comparisons involving bit tests are converted into shifts by
InstCombine and DAGCombine. However, shifts are expensive for most 8 and 16 bit
targets with comparatively cheaper selects.

It is desirable that selects are emitted instead of shifts for these targets.
The following cases were identified in TargetLowering and DAGCombine and were
fixed by:

https://reviews.llvm.org/D69116
https://reviews.llvm.org/D69120
https://reviews.llvm.org/D69326
https://reviews.llvm.org/D70042

Cases in InstCombine remain to be fixed. In llvm-dev it has been suggested that
these cases should be fixed by reversing the current canonicalisation. I am
showing them in this and following reports:

REPORTED CASE:

Source code:

int foldSelectICmpAndOr( int x, int y ) // (InstCombineSelect
foldSelectICmpAndOr)
{
  return (x & 128) ? (y | 2) : y;
}

IR code:

define i16 @foldSelectICmpAndOr(i16 %x, i16 %y) {
entry:
  %0 = lshr i16 %x, 6
  %1 = and i16 %0, 2
  %2 = or i16 %1, %y
  ret i16 %2
}

MSP430 Target code:

foldSelectICmpAndOr:
    clrc
    rrc r12
    rra r12
    rra r12
    rra r12
    rra r12
    rra r12
    and #2, r12
    bis r13, r12
    ret

AVR Target code:

foldSelectICmpAndOr:
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    andi    r24, 2
    andi    r25, 0
    or  r24, r22
    or  r25, r23
    ret

EXPECTED RESULT:

Source code:

int foldSelectICmpAndOr( int x, int y ) // (InstCombineSelect
foldSelectICmpAndOr)
{
  return (x & 128) ? (y | 2) : y;
}

Expected IR code:

define i16 @foldSelectICmpAndOr(i16 %x, i16 %y) {
entry:
  %0 = trunc i16 %x to i8
  %tobool = icmp slt i8 %0, 0
  %or = or i16 %y, 2
  %cond = select i1 %tobool, i16 %or, i16 %y
  ret i16 %cond
}

Expected MSP430 Target code:

foldSelectICmpAndOr:
    tst.b   r12
    jge .LBB7_2
    bis #2, r13
.LBB7_2:
    mov r13, r12
    ret

Expected AVR Target code:

foldSelectICmpAndOr:
    tst r24
    brpl    LBB7_2
    ori r22, 2
LBB7_2:
    mov r24, r22
    mov r25, r23
    ret
Quuxplusone commented 5 years ago
The following code is more focused on the intended report case:

REPORTED CASE:

Source code:

int foldSelectICmpAndOr( int x, int y ) // (InstCombineSelect
foldSelectICmpAndOr)
{
  return (x & 64) ? (y | 2) : y;
}

IR code:

define i16 @foldSelectICmpAndOr(i16 %x, i16 %y) #0 {
entry:
  %and = lshr i16 %x, 5
  %0 = and i16 %and, 2
  %1 = or i16 %0, %y
  ret i16 %1
}

MSP430 Target code:

foldSelectICmpAndOr:
    clrc
    rrc r12
    rra r12
    rra r12
    rra r12
    rra r12
    and #2, r12
    bis r13, r12
    ret

AVR Target code:

foldSelectICmpAndOr:
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    lsr r25
    ror r24
    andi    r24, 2
    andi    r25, 0
    or  r24, r22
    or  r25, r23
    ret

EXPECTED RESULT:

Source code:

int foldSelectICmpAndOr( int x, int y ) // (InstCombineSelect
foldSelectICmpAndOr)
{
  return (x & 64) ? (y | 2) : y;
}

Expected IR code:

define i16 @foldSelectICmpAndOr(i16 %x, i16 %y) {
entry:
  %and = and i16 %x, 64
  %tobool = icmp eq i16 %and, 0
  %or = or i16 %y, 2
  %cond = select i1 %tobool, i16 %y, i16 %or
  ret i16 %cond
}

Expected MSP430 Target code:

foldSelectICmpAndOr:
    bit #64, r12
    jeq .LBB7_2
    bis #2, r13
.LBB7_2:
    mov r13, r12
    ret

Expected AVR Target code:

foldSelectICmpAndOr:
    andi    r24, 64
    andi    r25, 0
    ldi r18, 0
    ldi r19, 0
    cp  r24, r18
    cpc r25, r19
    breq    LBB7_2
    ori r22, 2
LBB7_2:
    mov r24, r22
    mov r25, r23
    ret