ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
33.99k stars 2.49k forks source link

llvm: on i386 EXTRACT_VECTOR_ELT of v4i1 produces wrong code #4447

Open LemonBoy opened 4 years ago

LemonBoy commented 4 years ago
; ModuleID = 'foo'
source_filename = "foo"
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-unknown-windows-msvc"

@0 = internal unnamed_addr constant <4 x i1> <i1 true, i1 false, i1 true, i1 false>, align 4

; Function Attrs: nobuiltin nounwind uwtable
define x86_stdcallcc void @WinMainCRTStartup() {
Entry:
  call fastcc void @fun(<4 x i1> <i1 true, i1 false, i1 true, i1 false>)
  ret void
}

; Function Attrs: nobuiltin nounwind uwtable
define internal fastcc void @fun(<4 x i1>) unnamed_addr {
Entry:
  %x = alloca <4 x i1>, align 4
  store <4 x i1> %0, <4 x i1>* %x, align 4
  %1 = load <4 x i1>, <4 x i1>* %x
  %2 = extractelement <4 x i1> %1, i32 0
  %3 = icmp ne i1 %2, true
  br i1 %3, label %Then, label %Else

Then:                                             ; preds = %Entry
  call void @llvm.debugtrap()
  br label %EndIf

Else:                                             ; preds = %Entry
  br label %EndIf

EndIf:                                            ; preds = %Else, %Then
  %4 = load <4 x i1>, <4 x i1>* %x
  %5 = extractelement <4 x i1> %4, i32 1
  %6 = icmp ne i1 %5, false
  br i1 %6, label %Then1, label %Else2

Then1:                                            ; preds = %EndIf
  call void @llvm.debugtrap()
  br label %EndIf3

Else2:                                            ; preds = %EndIf
  br label %EndIf3

EndIf3:                                           ; preds = %Else2, %Then1
  %7 = load <4 x i1>, <4 x i1>* %x
  %8 = extractelement <4 x i1> %7, i32 2
  %9 = icmp ne i1 %8, true
  br i1 %9, label %Then4, label %Else5

Then4:                                            ; preds = %EndIf3
  call void @llvm.debugtrap()
  br label %EndIf6

Else5:                                            ; preds = %EndIf3
  br label %EndIf6

EndIf6:                                           ; preds = %Else5, %Then4
  %10 = load <4 x i1>, <4 x i1>* %x
  %11 = extractelement <4 x i1> %10, i32 3
  %12 = icmp ne i1 %11, false
  br i1 %12, label %Then7, label %Else8

Then7:                                            ; preds = %EndIf6
  call void @llvm.debugtrap()
  br label %EndIf9

Else8:                                            ; preds = %EndIf6
  br label %EndIf9

EndIf9:                                           ; preds = %Else8, %Then7
  ret void
}

; Function Attrs: nounwind
declare void @llvm.debugtrap()

The extract_vector_elt (plus the any_extend I guess) is legalized as a CopyToReg + and 1 as you can see below. I think the problem is only in the evaluation of the and operand that shouldn't be always 1.

Initial selection DAG: %bb.3 'fun:EndIf'
SelectionDAG has 19 nodes:
  t0: ch = EntryToken
  t2: i32 = Constant<0>
    t4: v4i1,ch = load<(dereferenceable load 1 from %ir.x, align 4)> t0, FrameIndex:i32<0>, undef:i32
  t6: i1 = extract_vector_elt t4, Constant:i32<1>
        t7: i8 = any_extend t6
      t9: ch = CopyToReg t0, Register:i8 %20, t7
        t12: i1 = setcc t6, Constant:i1<0>, setne:ch
      t14: i1 = xor t12, Constant:i1<-1>
    t16: ch = brcond t9, t14, BasicBlock:ch<Else2 0x5588bc3e3478>
  t18: ch = br t16, BasicBlock:ch<Then1 0x5588bc3e33b8>

Optimized lowered selection DAG: %bb.3 'fun:EndIf'
SelectionDAG has 16 nodes:
  t0: ch = EntryToken
    t4: v4i1,ch = load<(dereferenceable load 1 from %ir.x, align 4)> t0, FrameIndex:i32<0>, undef:i32
  t6: i1 = extract_vector_elt t4, Constant:i32<1>
        t7: i8 = any_extend t6
      t9: ch = CopyToReg t0, Register:i8 %20, t7
      t20: i1 = setcc t6, Constant:i1<0>, seteq:ch
    t21: ch = brcond t9, t20, BasicBlock:ch<Else2 0x5588bc3e3478>
  t18: ch = br t21, BasicBlock:ch<Then1 0x5588bc3e33b8>

Type-legalized selection DAG: %bb.3 'fun:EndIf'
SelectionDAG has 16 nodes:
  t0: ch = EntryToken
  t32: i8,ch = load<(dereferenceable load 1 from %ir.x, align 4), anyext from i1> t0, FrameIndex:i32<0>, undef:i32
      t9: ch = CopyToReg t0, Register:i8 %20, t32
          t28: i8 = and t32, Constant:i8<1>
        t26: i8 = setcc t28, Constant:i8<0>, seteq:ch
      t34: i8 = and t26, Constant:i8<1>
    t21: ch = brcond t9, t34, BasicBlock:ch<Else2 0x5588bc3e3478>
  t18: ch = br t21, BasicBlock:ch<Then1 0x5588bc3e33b8>
andrewrk commented 4 years ago

Upstream bug report: https://bugs.llvm.org/show_bug.cgi?id=44902

alexrp commented 3 weeks ago

This has been fixed. I don't know if a test was disabled because of it, though.