llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
28.03k stars 11.58k forks source link

SLPVectorizer replaces `add nsw undef` with `add poison` #55653

Closed nunoplopes closed 2 years ago

nunoplopes commented 2 years ago

SLPVectorizer incorrectly replaces add nsw undef with add poison. However, add nsw undef, 0 is undef, not poison, so the transformation is not correct. Example:

; Test: Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll

define void @foo(ptr %c, ptr %d) {
%entry:
  %arrayidx1 = gep inbounds ptr %c, 1 x i64 4
  %0 = load i8, ptr %arrayidx1, align 1
  %conv2 = zext i8 %0 to i32
  %and = and i32 %conv2, 3
  %arrayidx4 = gep inbounds ptr %c, 1 x i64 1
  %1 = load i8, ptr %arrayidx4, align 1
  %conv5 = zext i8 %1 to i32
  %shl6 = shl nsw nuw i32 %conv5, 2
  %arrayidx12 = gep inbounds ptr %c, 1 x i64 2
  %2 = load i8, ptr %arrayidx12, align 1
  %conv13 = zext i8 %2 to i32
  %shl14 = shl nsw nuw i32 %conv13, 2
  %arrayidx17 = gep inbounds ptr %c, 1 x i64 3
  %3 = load i8, ptr %arrayidx17, align 1
  %conv18 = zext i8 %3 to i32
  %shl19 = shl nsw nuw i32 %conv18, 2
  %sub = add nsw i32 undef, %shl6
  %conv27 = sitofp i32 %sub to float, exceptions=ignore
  %div = fdiv float %conv27, undef, exceptions=ignore
  %add.ptr = gep inbounds ptr %d, 4 x i64 -1
  store float %div, ptr %add.ptr, align 4
  %sub32 = add nsw i32 undef, %and
  %conv33 = sitofp i32 %sub32 to float, exceptions=ignore
  %div36 = fdiv float %conv33, undef, exceptions=ignore
  %add.ptr37 = gep inbounds ptr %d, 4 x i64 -2
  store float %div36, ptr %add.ptr37, align 4

; HERE: add nsw undef
  %sub40 = add nsw i32 undef, %shl19

  %conv41 = sitofp i32 %sub40 to float, exceptions=ignore
  %div44 = fdiv float %conv41, undef, exceptions=ignore
  %add.ptr45 = gep inbounds ptr %d, 4 x i64 -3
  store float %div44, ptr %add.ptr45, align 4
  %sub48 = add nsw i32 undef, %shl14
  %conv49 = sitofp i32 %sub48 to float, exceptions=ignore
  %div52 = fdiv float %conv49, undef, exceptions=ignore
  %add.ptr53 = gep inbounds ptr %d, 4 x i64 -4
  store float %div52, ptr %add.ptr53, align 4
  ret void
}
=>
define void @foo(ptr %c, ptr %d) {
%entry:
  %arrayidx4 = gep inbounds ptr %c, 1 x i64 1
  %add.ptr53 = gep inbounds ptr %d, 4 x i64 -4
  %0 = bitcast ptr %arrayidx4 to ptr
  %1 = load <4 x i8>, ptr %0, align 1
  %2 = zext <4 x i8> %1 to <4 x i32>
  %3 = shl nsw nuw <4 x i32> %2, { 2, 2, 2, 3 }
  %4 = and <4 x i32> %2, { 2, 2, 2, 3 }
  %5 = shufflevector <4 x i32> %3, <4 x i32> %4, 1, 2, 7, 0

; And here we have add poison
  %6 = add nsw <4 x i32> poison, %5

  %7 = sitofp <4 x i32> %6 to <4 x float>, exceptions=ignore
  %8 = fdiv <4 x float> %7, poison, exceptions=ignore
  %9 = bitcast ptr %add.ptr53 to ptr
  store <4 x float> %8, ptr %9, align 4
  ret void
}
Transformation doesn't verify!
ERROR: Mismatch in memory

Example:
ptr %c = pointer(non-local, block_id=1, offset=2)
ptr %d = pointer(non-local, block_id=1, offset=32)

Source:
ptr %arrayidx1 = pointer(non-local, block_id=1, offset=6)
i8 %0 = #x00 (0)
i32 %conv2 = #x00000000 (0)
i32 %and = #x00000000 (0)
ptr %arrayidx4 = pointer(non-local, block_id=1, offset=3)
i8 %1 = poison
i32 %conv5 = poison
i32 %shl6 = poison
ptr %arrayidx12 = pointer(non-local, block_id=1, offset=4)
i8 %2 = poison
i32 %conv13 = poison
i32 %shl14 = poison
ptr %arrayidx17 = pointer(non-local, block_id=1, offset=5)
i8 %3 = #x00 (0)
i32 %conv18 = #x00000000 (0)
i32 %shl19 = #x00000000 (0)
i32 %sub = poison
float %conv27 = poison
float %div = poison
ptr %add.ptr = pointer(non-local, block_id=1, offset=28)
i32 %sub32 = #x00000000 (0)     [based on undef value]
float %conv33 = #x00000000 (+0.0)
float %div36 = NaN      [based on undef value]
ptr %add.ptr37 = pointer(non-local, block_id=1, offset=24)
i32 %sub40 = #x00000000 (0)     [based on undef value]
float %conv41 = #x00000000 (+0.0)
float %div44 = NaN      [based on undef value]
ptr %add.ptr45 = pointer(non-local, block_id=1, offset=20)
i32 %sub48 = poison
float %conv49 = poison
float %div52 = poison
ptr %add.ptr53 = pointer(non-local, block_id=1, offset=16)

SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 >       size: 0 align: 1        alloc type: 0
Block 1 >       size: 135       align: 256      alloc type: 0
Block 2 >       size: 254       align: 65536    alloc type: 0

Target:
ptr %arrayidx4 = pointer(non-local, block_id=1, offset=3)
ptr %add.ptr53 = pointer(non-local, block_id=1, offset=16)
ptr %0 = pointer(non-local, block_id=1, offset=3)
<4 x i8> %1 = < poison, poison, #x00 (0), #x00 (0) >
<4 x i32> %2 = < poison, poison, #x00000000 (0), #x00000000 (0) >
<4 x i32> %3 = < poison, poison, #x00000000 (0), #x00000000 (0) >
<4 x i32> %4 = < poison, poison, #x00000000 (0), #x00000000 (0) >
<4 x i32> %5 = < poison, #x00000000 (0), #x00000000 (0), poison >
<4 x i32> %6 = < poison, poison, poison, poison >
<4 x float> %7 = < poison, poison, poison, poison >
<4 x float> %8 = < poison, poison, poison, poison >
ptr %9 = pointer(non-local, block_id=1, offset=16)

Mismatch in pointer(non-local, block_id=1, offset=22)
Source value: #x80
Target value: poison

cc @regehr @cilkplus @davemgreen @RKSimon

fhahn commented 2 years ago

Also cc @alexey-bataev

alexey-bataev commented 2 years ago

Fixed in 120d52b0ef8b0d41d5ce5898422d2d2f42047d0b