llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.59k stars 11.34k forks source link

[InstCombine] fold select of select with common argument #82350

Open sftlbcn opened 6 months ago

sftlbcn commented 6 months ago

fold other select to logical and/or if either icmp predicate is free to invert https://alive2.llvm.org/ce/z/aSYXfg

----------------------------------------
define i32 @src_or(i32 %x, i32 %y, i32 %z, i1 %cmp1) {
#0:
  %cmp2 = icmp eq i32 %z, 0
  %sel1 = select i1 %cmp1, i32 %x, i32 %y
  %sel2 = select i1 %cmp2, i32 %sel1, i32 %x
  ret i32 %sel2
}
=>
define i32 @tgt_or(i32 %x, i32 %y, i32 %z, i1 %cmp1) {
#0:
  %cmp2_inv = icmp ne i32 %z, 0
  %sel1 = select i1 %cmp2_inv, i1 1, i1 %cmp1
  %sel2 = select i1 %sel1, i32 %x, i32 %y
  ret i32 %sel2
}
Transformation seems to be correct!

----------------------------------------
define i32 @src_and(i32 %x, i32 %y, i32 %z, i1 %cmp1) {
#0:
  %cmp2 = icmp eq i32 %z, 0
  %sel1 = select i1 %cmp2, i32 %x, i32 %y
  %sel2 = select i1 %cmp1, i32 %sel1, i32 %x
  ret i32 %sel2
}
=>
define i32 @tgt_and(i32 %x, i32 %y, i32 %z, i1 %cmp1) {
#0:
  %cmp2_inv = icmp ne i32 %z, 0
  %sel1 = select i1 %cmp1, i1 %cmp2_inv, i1 0
  %sel2 = select i1 %sel1, i32 %y, i32 %x
  ret i32 %sel2
}
Transformation seems to be correct!
sftlbcn commented 6 months ago

some examples from real code:

;abseil-cpp
define i32 @src(i32 %a, i64 %b, i32 %c) {
  %cmp1 = icmp eq i64 %b, 0
  %cmp2 = icmp slt i64 %b, 0
  %spec.select632 = select i1 %cmp2, i32 0, i32 %a
  %85 = select i1 %cmp1, i32 %a, i32 %spec.select632
  ret i32 %85
}
define i32 @tgt(i32 %a, i64 %b, i32 %c) {
  %cmp2 = icmp slt i64 %b, 0
  %1 = select i1 %cmp2, i32 0, i32 %a
  ret i32 %1
}

;hermes
define i32 @src2(i32 %a, i32 %b, i32 %c) {
  %cmp108.i = icmp slt i32 %a, 9
  %cmp113.i = icmp slt i32 %a, 17
  %spec.select.i = select i1 %cmp113.i, i32 %c, i32 %b
  %z.5.i = select i1 %cmp108.i, i32 %b, i32 %spec.select.i
  ret i32 %z.5.i
}
define i32 @tgt2(i32 %a, i32 %b, i32 %c) {
  %1 = add i32 %a, -17
  %spec.select.i = icmp ult i32 %1, -8
  %z.5.i = select i1 %spec.select.i, i32 %b, i32 %c
  ret i32 %z.5.i
}

define i64 @src3(i32 %a, i64 %b, i64 %c) {
  %cmp16.i.i = icmp eq i32 %a, 0
  %cmp20.i.i = icmp ugt i32 %a, 511
  %spec.select11.i = select i1 %cmp20.i.i, i64 %b, i64 %c
  %retval.0.i.i = select i1 %cmp16.i.i, i64 %c, i64 %spec.select11.i
  ret i64 %retval.0.i.i
}
define i64 @tgt3(i32 %a, i64 %b, i64 %c) {
  %cmp20.i.i = icmp ugt i32 %a, 511
  %retval.0.i.i = select i1 %cmp20.i.i, i64 %b, i64 %c
  ret i64 %retval.0.i.i
}
resistor commented 5 months ago

Are these transformations generally profitable? In a quick test they seem to generate the same number of instructions on X86, and more instructions on AArch64.

sftlbcn commented 5 months ago

there's an existing fold when the common arg is on same hand of the selects, should be possible to extend? https://github.com/llvm/llvm-project/blob/2c5d01c2cfa80fd734b94833bdf7b5b3f6f2ebb0/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp#L3652-L3655

dtcxzyw commented 5 months ago

some examples from real code:

;abseil-cpp
define i32 @src(i32 %a, i64 %b, i32 %c) {
  %cmp1 = icmp eq i64 %b, 0
  %cmp2 = icmp slt i64 %b, 0
  %spec.select632 = select i1 %cmp2, i32 0, i32 %a
  %85 = select i1 %cmp1, i32 %a, i32 %spec.select632
  ret i32 %85
}
define i32 @tgt(i32 %a, i64 %b, i32 %c) {
  %cmp2 = icmp slt i64 %b, 0
  %1 = select i1 %cmp2, i32 0, i32 %a
  ret i32 %1
}

;hermes
define i32 @src2(i32 %a, i32 %b, i32 %c) {
  %cmp108.i = icmp slt i32 %a, 9
  %cmp113.i = icmp slt i32 %a, 17
  %spec.select.i = select i1 %cmp113.i, i32 %c, i32 %b
  %z.5.i = select i1 %cmp108.i, i32 %b, i32 %spec.select.i
  ret i32 %z.5.i
}
define i32 @tgt2(i32 %a, i32 %b, i32 %c) {
  %1 = add i32 %a, -17
  %spec.select.i = icmp ult i32 %1, -8
  %z.5.i = select i1 %spec.select.i, i32 %b, i32 %c
  ret i32 %z.5.i
}

define i64 @src3(i32 %a, i64 %b, i64 %c) {
  %cmp16.i.i = icmp eq i32 %a, 0
  %cmp20.i.i = icmp ugt i32 %a, 511
  %spec.select11.i = select i1 %cmp20.i.i, i64 %b, i64 %c
  %retval.0.i.i = select i1 %cmp16.i.i, i64 %c, i64 %spec.select11.i
  ret i64 %retval.0.i.i
}
define i64 @tgt3(i32 %a, i64 %b, i64 %c) {
  %cmp20.i.i = icmp ugt i32 %a, 511
  %retval.0.i.i = select i1 %cmp20.i.i, i64 %b, i64 %c
  ret i64 %retval.0.i.i
}

I think these can be handled by https://github.com/llvm/llvm-project/pull/83739.

dtcxzyw commented 5 months ago

some examples from real code:

;abseil-cpp
define i32 @src(i32 %a, i64 %b, i32 %c) {
  %cmp1 = icmp eq i64 %b, 0
  %cmp2 = icmp slt i64 %b, 0
  %spec.select632 = select i1 %cmp2, i32 0, i32 %a
  %85 = select i1 %cmp1, i32 %a, i32 %spec.select632
  ret i32 %85
}
define i32 @tgt(i32 %a, i64 %b, i32 %c) {
  %cmp2 = icmp slt i64 %b, 0
  %1 = select i1 %cmp2, i32 0, i32 %a
  ret i32 %1
}

;hermes
define i32 @src2(i32 %a, i32 %b, i32 %c) {
  %cmp108.i = icmp slt i32 %a, 9
  %cmp113.i = icmp slt i32 %a, 17
  %spec.select.i = select i1 %cmp113.i, i32 %c, i32 %b
  %z.5.i = select i1 %cmp108.i, i32 %b, i32 %spec.select.i
  ret i32 %z.5.i
}
define i32 @tgt2(i32 %a, i32 %b, i32 %c) {
  %1 = add i32 %a, -17
  %spec.select.i = icmp ult i32 %1, -8
  %z.5.i = select i1 %spec.select.i, i32 %b, i32 %c
  ret i32 %z.5.i
}

define i64 @src3(i32 %a, i64 %b, i64 %c) {
  %cmp16.i.i = icmp eq i32 %a, 0
  %cmp20.i.i = icmp ugt i32 %a, 511
  %spec.select11.i = select i1 %cmp20.i.i, i64 %b, i64 %c
  %retval.0.i.i = select i1 %cmp16.i.i, i64 %c, i64 %spec.select11.i
  ret i64 %retval.0.i.i
}
define i64 @tgt3(i32 %a, i64 %b, i64 %c) {
  %cmp20.i.i = icmp ugt i32 %a, 511
  %retval.0.i.i = select i1 %cmp20.i.i, i64 %b, i64 %c
  ret i64 %retval.0.i.i
}

I think these can be handled by #83739.

Sorry for the noise. The inner select is not in the expected arm :(