llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.32k stars 12.12k forks source link

[SLPVectorizer] Unsound transform: Target is more poisonous than source according to Alive2 #63049

Closed fhahn closed 1 year ago

fhahn commented 1 year ago

Running llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll through Alive2 (with opt-alive.sh) shows that SLPVectorizer performs an unsound transformation, after which the target is more poisonous than source.

Reproducer IR for just running SLPVectorizer:

target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--"

define float @test_merge_anyof_v4sf(<4 x float> %t) local_unnamed_addr #0 {
entry:
  %vecext = extractelement <4 x float> %t, i64 0
  %cmp = fcmp olt float %vecext, 0.000000e+00
  %vecext2 = extractelement <4 x float> %t, i64 1
  %cmp4 = fcmp olt float %vecext2, 0.000000e+00
  %or.cond = select i1 %cmp, i1 true, i1 %cmp4
  %vecext7 = extractelement <4 x float> %t, i64 2
  %cmp9 = fcmp olt float %vecext7, 0.000000e+00
  %or.cond1 = select i1 %or.cond, i1 true, i1 %cmp9
  %vecext12 = extractelement <4 x float> %t, i64 3
  %cmp14 = fcmp olt float %vecext12, 0.000000e+00
  %or.cond2 = select i1 %or.cond1, i1 true, i1 %cmp14
  %cmp19 = fcmp ogt float %vecext, 1.000000e+00
  %or.cond3 = select i1 %or.cond2, i1 true, i1 %cmp19
  %cmp24 = fcmp ogt float %vecext2, 1.000000e+00
  %or.cond4 = select i1 %or.cond3, i1 true, i1 %cmp24
  %cmp29 = fcmp ogt float %vecext7, 1.000000e+00
  %or.cond5 = select i1 %or.cond4, i1 true, i1 %cmp29
  %cmp34 = fcmp ogt float %vecext12, 1.000000e+00
  %or.cond6 = select i1 %or.cond5, i1 true, i1 %cmp34
  %add = fadd float %vecext, %vecext2
  %retval.0 = select i1 %or.cond6, float 0.000000e+00, float %add
  ret float %retval.0
}

Report by Alive2:

define float @test_merge_anyof_v4sf(<4 x float> %t) {
%entry:
  %vecext = extractelement <4 x float> %t, i64 0
  %cmp = fcmp olt float %vecext, 0.000000
  %vecext2 = extractelement <4 x float> %t, i64 1
  %cmp4 = fcmp olt float %vecext2, 0.000000
  %or.cond = select i1 %cmp, i1 1, i1 %cmp4
  %vecext7 = extractelement <4 x float> %t, i64 2
  %cmp9 = fcmp olt float %vecext7, 0.000000
  %or.cond1 = select i1 %or.cond, i1 1, i1 %cmp9
  %vecext12 = extractelement <4 x float> %t, i64 3
  %cmp14 = fcmp olt float %vecext12, 0.000000
  %or.cond2 = select i1 %or.cond1, i1 1, i1 %cmp14
  %cmp19 = fcmp ogt float %vecext, 1.000000
  %or.cond3 = select i1 %or.cond2, i1 1, i1 %cmp19
  %cmp24 = fcmp ogt float %vecext2, 1.000000
  %or.cond4 = select i1 %or.cond3, i1 1, i1 %cmp24
  %cmp29 = fcmp ogt float %vecext7, 1.000000
  %or.cond5 = select i1 %or.cond4, i1 1, i1 %cmp29
  %cmp34 = fcmp ogt float %vecext12, 1.000000
  %or.cond6 = select i1 %or.cond5, i1 1, i1 %cmp34
  %add = fadd float %vecext, %vecext2
  %retval.0 = select i1 %or.cond6, float 0.000000, float %add
  ret float %retval.0
}
=>
define float @test_merge_anyof_v4sf(<4 x float> %t) {
%entry:
  %0 = shufflevector <4 x float> %t, <4 x float> poison, 0, 1, 2, 3, 0, 1, 2, 3
  %1 = fcmp ogt <8 x float> %0, { 1.000000, 1.000000, 1.000000, 1.000000, 0.000000, 0.000000, 0.000000, 0.000000 }
  %2 = fcmp olt <8 x float> %0, { 1.000000, 1.000000, 1.000000, 1.000000, 0.000000, 0.000000, 0.000000, 0.000000 }
  %3 = shufflevector <8 x i1> %1, <8 x i1> %2, 0, 1, 2, 3, 12, 13, 14, 15
  %4 = freeze <8 x i1> %3
  %5 = reduce_smax <8 x i1> %4
  %6 = extractelement <4 x float> %t, i64 0
  %7 = extractelement <4 x float> %t, i64 1
  %add = fadd float %6, %7
  %retval.0 = select i1 %5, float 0.000000, float %add
  ret float %retval.0
}
Transformation doesn't verify! (unsound)
ERROR: Target is more poisonous than source

Example:
<4 x float> %t = < #x80200000 (-0.000000000000?), poison, poison, #x40040000 (2.0625) >

Source:
float %vecext = #x80200000 (-0.000000000000?)
i1 %cmp = #x1 (1)
float %vecext2 = poison
i1 %cmp4 = poison
i1 %or.cond = #x1 (1)
float %vecext7 = poison
i1 %cmp9 = poison
i1 %or.cond1 = #x1 (1)
float %vecext12 = #x40040000 (2.0625)
i1 %cmp14 = #x0 (0)
i1 %or.cond2 = #x1 (1)
i1 %cmp19 = #x0 (0)
i1 %or.cond3 = #x1 (1)
i1 %cmp24 = poison
i1 %or.cond4 = #x1 (1)
i1 %cmp29 = poison
i1 %or.cond5 = #x1 (1)
i1 %cmp34 = #x1 (1)
i1 %or.cond6 = #x1 (1)
float %add = poison
float %retval.0 = #x00000000 (+0.0)

Target:
<8 x float> %0 = < #x80200000 (-0.000000000000?), poison, poison, #x40040000 (2.0625), #x80200000 (-0.000000000000?), poison, poison, #x40040000 (2.0625) >
<8 x i1> %1 = < #x0 (0), poison, poison, #x1 (1), #x0 (0), poison, poison, #x1 (1) >
<8 x i1> %2 = < #x1 (1), poison, poison, #x0 (0), #x1 (1), poison, poison, #x0 (0) >
<8 x i1> %3 = < #x0 (0), poison, poison, #x1 (1), #x1 (1), poison, poison, #x0 (0) >
<8 x i1> %4 = < #x0 (0), #x0 (0), #x0 (0), #x1 (1), #x1 (1), #x0 (0), #x0 (0), #x0 (0) >
i1 %5 = #x0 (0)
float %6 = #x80200000 (-0.000000000000?)
float %7 = poison
float %add = poison
float %retval.0 = poison
Source value: #x00000000 (+0.0)
Target value: poison
alexey-bataev commented 1 year ago

Why call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %4) gets translated to reduce_smax <8 x i1>? Looks like it should be reduce_umax <8 x i1>, otherwise i1 value 1 is considered to be negative and 0 is chosen as a max signed value.

nikic commented 1 year ago

I think this was part of the set of issues where @fhahn's alive build had some kind of off-by-one issue with intrinsic IDs. The online version uses reduce_or and the transform verifies: https://alive2.llvm.org/ce/z/W3BmW2