llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
28.49k stars 11.77k forks source link

LICM miscompilation #64897

Closed cbeuw closed 10 months ago

cbeuw commented 1 year ago
; ModuleID = 'repro.8de1151540a1c9b4-cgu.0'
source_filename = "repro.8de1151540a1c9b4-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; repro::dump_var1
; Function Attrs: noinline nonlazybind uwtable
define internal fastcc void @_ZN5repro9dump_var117h8dff5c7c82785d52E(i64 noundef %f, i64 noundef %var0, ptr noalias nocapture noundef readonly align 4 dereferenceable(28) %val0, i64 noundef %var1, i32 noundef %val1, i64 noundef %var2, i16 noundef %val2, i64 noundef %var3, ptr noalias nocapture noundef readonly align 8 dereferenceable(136) %val3) unnamed_addr #0 {
start:
  %0 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %val3, i64 0, i32 1, i32 1
  %_11 = load i8, ptr %0, align 4, !noundef !3
  call void @print_char(i8 noundef zeroext %_11)
  ret void
}

; repro::dump_var2
; Function Attrs: noinline nonlazybind uwtable
define internal fastcc void @_ZN5repro9dump_var217h50ad6f199af3f2eaE(i64 noundef %f, i64 noundef %var0, i64 noundef %val0, i64 noundef %var1, i32 noundef %val1, i64 noundef %var2, ptr noalias nocapture noundef readonly align 8 dereferenceable(136) %val2, i64 noundef %var3, i64 %0) unnamed_addr #0 {
start:
  %.sroa.4.0.extract.shift = lshr i64 %0, 32
  %.sroa.4.0.extract.trunc = trunc i64 %.sroa.4.0.extract.shift to i8
  %1 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %val2, i64 0, i32 1, i32 1
  %_11 = load i8, ptr %1, align 4, !noundef !3
  call void @print_char(i8 noundef zeroext %_11)
  call void @print_char(i8 noundef zeroext %.sroa.4.0.extract.trunc)
  ret void
}

; Function Attrs: nonlazybind uwtable
define void @fn1() unnamed_addr #1 {
start:
  %_24 = alloca { [8 x i128], { i32, i8, i8, [2 x i8] } }, align 8
  %_23 = alloca { [8 x i128], { i32, i8, i8, [2 x i8] } }, align 8
  %_18 = alloca [7 x i32], align 4
  %0 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_23, i64 0, i32 1
  %1 = getelementptr inbounds [7 x i32], ptr %_18, i64 0, i64 1
  %2 = getelementptr inbounds [7 x i32], ptr %_18, i64 0, i64 2
  %3 = getelementptr inbounds [7 x i32], ptr %_18, i64 0, i64 3
  %4 = getelementptr inbounds [7 x i32], ptr %_18, i64 0, i64 4
  %5 = getelementptr inbounds [7 x i32], ptr %_18, i64 0, i64 5
  %6 = getelementptr inbounds [7 x i32], ptr %_18, i64 0, i64 6
  br label %bb1

bb1:                                              ; preds = %bb3, %start
  %_5.0 = phi i64 [ -47, %start ], [ %.lcssa4, %bb3 ]
  %_4.0 = phi i64 [ -47, %start ], [ %_8.lcssa7, %bb3 ]
  store i32 -571572757, ptr %0, align 8
  br label %bb2

bb2:                                              ; preds = %bb2, %bb1
  %_5.1 = phi i64 [ %_5.0, %bb1 ], [ %7, %bb2 ]
  %_4.1 = phi i64 [ %_4.0, %bb1 ], [ %_8, %bb2 ]
  %_8 = mul i64 %_4.1, -47
  %7 = mul i64 %_4.1, -1693353459891306496
  switch i64 %_5.1, label %bb7.loopexit [
    i64 1, label %bb2
    i64 -47, label %bb3
  ]

bb7.loopexit:                                     ; preds = %bb2
  store i32 97, ptr %_18, align 4
  store i32 97, ptr %1, align 4
  store i32 97, ptr %2, align 4
  store i32 97, ptr %3, align 4
  store i32 97, ptr %4, align 4
  store i32 97, ptr %5, align 4
  store i32 97, ptr %6, align 4
  br label %bb7

bb7.loopexit3:                                    ; preds = %bb3
  store i32 97, ptr %_18, align 4
  store i32 97, ptr %1, align 4
  store i32 97, ptr %2, align 4
  store i32 97, ptr %3, align 4
  store i32 97, ptr %4, align 4
  store i32 97, ptr %5, align 4
  store i32 97, ptr %6, align 4
  br label %bb7

bb7:                                              ; preds = %bb7.loopexit3, %bb7.loopexit, %bb5
  ret void

bb3:                                              ; preds = %bb2
  %_8.lcssa7 = phi i64 [ %_8, %bb2 ]
  %.lcssa4 = phi i64 [ %7, %bb2 ]
  %8 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_23, i64 0, i32 1
  store i32 97, ptr %8, align 8
  store i64 95947119685, ptr %8, align 8
  %9 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_24, i64 0, i32 1
  %10 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_24, i64 0, i32 1, i32 1
  %11 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_23, i64 0, i32 1, i32 1
  %12 = load i8, ptr %11, align 4, !noundef !3
  %13 = xor i8 %12, -1
  store i8 %13, ptr %10, align 4
  store i8 0, ptr %11, align 4
  %14 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_23, i64 0, i32 1, i32 2
  %15 = load i8, ptr %14, align 1, !range !4, !noundef !3
  %16 = load i32, ptr %8, align 8, !noundef !3
  store i32 %16, ptr %9, align 8
  store i8 0, ptr %10, align 4
  %17 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_24, i64 0, i32 1, i32 2
  store i8 %15, ptr %17, align 1
  switch i32 %16, label %bb7.loopexit3 [
    i32 0, label %bb1
    i32 1457839173, label %bb5
  ]

bb5:                                              ; preds = %bb3
  %.lcssa12 = phi i8 [ %13, %bb3 ]
  %.lcssa10 = phi i8 [ %15, %bb3 ]
  %.lcssa8 = phi i32 [ %16, %bb3 ]
  store i32 97, ptr %_18, align 4
  store i32 97, ptr %1, align 4
  store i32 97, ptr %2, align 4
  store i32 97, ptr %3, align 4
  store i32 97, ptr %4, align 4
  store i32 97, ptr %5, align 4
  store i32 97, ptr %6, align 4
  %18 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_23, i64 0, i32 1, i32 1
  store i128 -89304870548512196620982851945701258489, ptr %_24, align 8
  %_17.sroa.2.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 16
  store i128 -60367832464323173041853249899068734585, ptr %_17.sroa.2.0._24.sroa_idx, align 8
  %_17.sroa.3.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 32
  store i128 -93906512331388985662572272619431403526, ptr %_17.sroa.3.0._24.sroa_idx, align 8
  %_17.sroa.4.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 48
  store i128 -125516722603504180270166493233981031438, ptr %_17.sroa.4.0._24.sroa_idx, align 8
  %_17.sroa.5.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 64
  store i128 17428368806739377979573757188831317740, ptr %_17.sroa.5.0._24.sroa_idx, align 8
  %_17.sroa.6.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 80
  store i128 -124632825879573085834035531238990818991, ptr %_17.sroa.6.0._24.sroa_idx, align 8
  %_17.sroa.7.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 96
  store i128 99533020080114872928824195912378985846, ptr %_17.sroa.7.0._24.sroa_idx, align 8
  %_17.sroa.8.0._24.sroa_idx = getelementptr inbounds i8, ptr %_24, i64 112
  store i128 141971359805814584223867407839838043800, ptr %_17.sroa.8.0._24.sroa_idx, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) %_23, ptr noundef nonnull align 8 dereferenceable(128) %_24, i64 128, i1 false)
  store i8 %.lcssa12, ptr %18, align 4
; call repro::dump_var1
  call fastcc void @_ZN5repro9dump_var117h8dff5c7c82785d52E(i64 noundef 1, i64 noundef 26, ptr noalias nocapture noundef nonnull readonly align 4 dereferenceable(28) %_18, i64 noundef 19, i32 noundef 97, i64 noundef 20, i16 noundef 1207, i64 noundef 27, ptr noalias nocapture noundef nonnull readonly align 8 dereferenceable(136) %_23)
  %_21.sroa.4.0.insert.ext = zext i8 %.lcssa10 to i64
  %_21.sroa.4.0.insert.shift = shl nuw nsw i64 %_21.sroa.4.0.insert.ext, 40
  %_21.sroa.3.0.insert.ext = zext i8 %.lcssa12 to i64
  %_21.sroa.3.0.insert.shift = shl nuw nsw i64 %_21.sroa.3.0.insert.ext, 32
  %_21.sroa.3.0.insert.insert = add nuw nsw i64 %_21.sroa.4.0.insert.shift, %_21.sroa.3.0.insert.shift
  %_21.sroa.0.0.insert.ext = zext i32 %.lcssa8 to i64
  %_21.sroa.0.0.insert.insert = add nuw nsw i64 %_21.sroa.3.0.insert.insert, %_21.sroa.0.0.insert.ext
; call repro::dump_var2
  call fastcc void @_ZN5repro9dump_var217h50ad6f199af3f2eaE(i64 noundef 1, i64 noundef 29, i64 noundef -1, i64 noundef 13, i32 noundef 97, i64 noundef 33, ptr noalias nocapture noundef nonnull readonly align 8 dereferenceable(136) %_24, i64 noundef 37, i64 %_21.sroa.0.0.insert.insert)
  br label %bb7
}

; Function Attrs: nonlazybind uwtable
declare void @print_char(i8 noundef zeroext) unnamed_addr #1

; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2

attributes #0 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.74.0-nightly (ef85656a1 2023-08-21)"}
!3 = !{}
!4 = !{i8 0, i8 2}

When compiled with the helper

#include <stdio.h>

void print_char(unsigned char v) {
    printf("%d\n", v);
}

void fn1(void);

int main() {
    fn1();
}

Should print

$ llc -O0 repro.ll && clang helper.c repro.s && ./a.out
233
0
233

However, with LICM optimisation, this prints something else

$ opt -passes=licm repro.ll -S | llc -o repro.s && clang helper.c repro.s && ./a.out
255
0
255
nikic commented 1 year ago

What's the original test case, not pre-licm? I think this is an artifact of accessing uninitialized memory.

cbeuw commented 1 year ago

This is the reproduction in Rust's custom MIR:

#![feature(custom_mir, core_intrinsics)]
#![allow(unused_parens, unused_assignments, overflowing_literals)]
extern crate core;
use core::intrinsics::mir::*;

#[inline(never)]
fn dump_var1(
    f: usize,
    var0: usize,
    val0: [char; 7],
    var1: usize,
    val1: char,
    var2: usize,
    val2: u16,
    var3: usize,
    val3: ([u128;8], (u32, u8, bool)),
) {
    println!("{}", val3.1.1);
}

#[inline(never)]
fn dump_var2(
    f: usize,
    var0: usize,
    val0: u64,
    var1: usize,
    val1: char,
    var2: usize,
    val2: ([u128; 8], (u32, u8, bool)),
    var3: usize,
    val3: (u32, u8,bool),
) {
    println!("{}", val2.1.1);
    println!("{}", val3.1);
}

#[custom_mir(dialect = "runtime", phase = "initial")]
fn fn1() {
    mir! {
    let _1: isize;
    let _5: u64;
    let _6: isize;
    let _10: isize;
    let _12: isize;
    let _14: isize;
    let _20: u16;
    let _25: isize;
    let _28: u16;
    let _29: u64;

    let _23: bool;
    let _30: bool;

    let _9: char;
    let _13: char;
    let _19: char;

    let _21: [char; 5];
    let _24: [u128; 8];
    let _26: [char; 7];
    let _36: [u8; 1];

    let _32: (u16, i128);
    let _37: (u32, u8, bool);

    let _31: Adt64;

    let _27: ([u128; 8], (u32, u8, bool));
    let _33: ([u128; 8], (u32, u8, bool));
    let _43: ();
    {
    _1 = -47;
    _5 = 0;
    _6 = 0;
    _9 = 'a';
    _10 = _1;
    _12 = _1;
    _13 = 'a';
    _19 = _9;
    _20 = 1207_u16;
    Goto(bb4)
    }
    bb4 = {
    _21 = [_13,_9,_9,_9,_19];
    Goto(bb5)
    }
    bb5 = {
    _14 = _12;
    _25 = _1 * _10;
    _23 = _5 != _5;
    _10 = _25;
    _24 = [250977496372426266842391755486066952967_u128,279914534456615290421521357532699476871_u128,246375854589549477800802334812336807930_u128,214765644317434283193208114197787180018_u128,17428368806739377979573757188831317740_u128,215649541041365377629339076192777392465_u128,99533020080114872928824195912378985846_u128,141971359805814584223867407839838043800_u128];
    _27.1.0 = !571572756_u32;
    _28 = _9 as u16;
    _12 = _10 << _20;
    _26 = [_19,_13,_13,_13,_13,_9,_19];
    match _14 {
    1 => bb5,
    340282366920938463463374607431768211409 => bb10,
    _ => bb20
    }
    }
    bb10 = {
    _29 = !_5;
    _27.1.0 = _9 as u32;
    Call(_27.1 = fn3(_23), bb11)
    }
    bb11 = {
    _33.1.1 = !_27.1.1;
    _31 = Adt64 { fld0: _33.1.1 };
    _27.1.1 = _6 as u8;
    _30 = _27.1.2;
    _32 = (_28, 17720767051099432542734011916916350651_i128);
    _33.1 = (_27.1.0, _27.1.1, _30);
    _37 = (_33.1.0, _31.fld0, _33.1.2);
    _36 = [_31.fld0];
    match _27.1.0 {
    0 => bb4,
    1457839173 => bb16,
    _ => bb20
    }
    }
    bb16 = {
    _33.0 = _24;
    _27 = (_33.0, _37);
    Goto(bb17)
    }
    bb17 = {
    Call(_43 = dump_var1(1_usize, 26_usize, Move(_26), 19_usize, Move(_19), 20_usize, Move(_20), 27_usize, Move(_27)), bb19)
    }
    bb19 = {
    Call(_43 = dump_var2(1_usize, 29_usize, Move(_29), 13_usize, Move(_13), 33_usize, Move(_33), 37_usize, Move(_37)), bb20)
    }
    bb20 = {
    Return()
    }

    }
}
fn fn3(mut _15: bool) -> (u32, u8, bool) {
    return (1457839173_u32, 22_u8, _15);
}
pub fn main() {
    fn1();
}
pub struct Adt64 {
    fld0: u8,
}
$ rustc -Zmir-opt-level=0 -Copt-level=0 repro.rs && ./repro
233
0
233
$ rustc -Zmir-opt-level=0 -Copt-level=1 repro.rs && ./repro
255
0
255
$ rustc -Zmir-opt-level=0 -Copt-level=2 repro.rs && ./repro
0
0
0

But I couldn't get a pure IR reproduction without comparing before/after LICM

cbeuw commented 10 months ago

I got an original IR reproduction which shouldn't have any uninit memory access. It's self-standing.

Right:

$ clang -O0 repro.ll && ./a.out
0
1457839173
233
0
1457839173
161

Wrong:

$ clang -O1 repro.ll && ./a.out
0
1457839173
255
0
1457839173
161

Reproduction:

; ModuleID = 'repro.6b9acda9b5bccdd7-cgu.0'
source_filename = "repro.6b9acda9b5bccdd7-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17he90579972123ee41E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h56f5507f08b0f565E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E" }>, align 8
@alloc_87551382a9de3243abbfdbda2f0b586b = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"%d\0A\00" }>, align 1

; std::sys_common::backtrace::__rust_begin_short_backtrace
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7690185bae7be86dE(ptr %f) unnamed_addr #0 {
start:
; call core::ops::function::FnOnce::call_once
  call void @_ZN4core3ops8function6FnOnce9call_once17h51fc9644382a8c58E(ptr %f)
  call void asm sideeffect "", "~{memory}"(), !srcloc !4
  ret void
}

; std::rt::lang_start::{{closure}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E"(ptr align 8 %_1) unnamed_addr #1 {
start:
  %self = alloca i8, align 1
  %_4 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call std::sys_common::backtrace::__rust_begin_short_backtrace
  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7690185bae7be86dE(ptr %_4)
; call <() as std::process::Termination>::report
  %0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h66ba62261e510d16E"()
  store i8 %0, ptr %self, align 1
  %_6 = load i8, ptr %self, align 1, !noundef !5
  %_0 = zext i8 %_6 to i32
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h56f5507f08b0f565E"(ptr %_1) unnamed_addr #1 {
start:
  %_2 = alloca {}, align 1
  %0 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call core::ops::function::FnOnce::call_once
  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h64ee9a53e78ade0cE(ptr %0)
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core3ops8function6FnOnce9call_once17h51fc9644382a8c58E(ptr %_1) unnamed_addr #1 {
start:
  %_2 = alloca {}, align 1
  call void %_1()
  ret void
}

; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h64ee9a53e78ade0cE(ptr %0) unnamed_addr #1 personality ptr @rust_eh_personality {
start:
  %1 = alloca { ptr, i32 }, align 8
  %_2 = alloca {}, align 1
  %_1 = alloca ptr, align 8
  store ptr %0, ptr %_1, align 8
; invoke std::rt::lang_start::{{closure}}
  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E"(ptr align 8 %_1)
          to label %bb1 unwind label %cleanup

bb3:                                              ; preds = %cleanup
  %2 = load ptr, ptr %1, align 8, !noundef !5
  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
  %4 = load i32, ptr %3, align 8, !noundef !5
  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
  resume { ptr, i32 } %6

cleanup:                                          ; preds = %start
  %7 = landingpad { ptr, i32 }
          cleanup
  %8 = extractvalue { ptr, i32 } %7, 0
  %9 = extractvalue { ptr, i32 } %7, 1
  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
  store ptr %8, ptr %10, align 8
  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
  store i32 %9, ptr %11, align 8
  br label %bb3

bb1:                                              ; preds = %start
  ret i32 %_0
}

; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17he90579972123ee41E"(ptr align 8 %_1) unnamed_addr #1 {
start:
  ret void
}

; <() as std::process::Termination>::report
; Function Attrs: inlinehint nonlazybind uwtable
define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h66ba62261e510d16E"() unnamed_addr #1 {
start:
  ret i8 0
}

; repro::dump_var
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN5repro8dump_var17hc219273b1201587bE(ptr align 8 %val2) unnamed_addr #0 {
start:
  %0 = getelementptr inbounds [8 x i128], ptr %val2, i64 0, i64 0
  %_8 = load i128, ptr %0, align 8, !noundef !5
  %_7 = trunc i128 %_8 to i32
  %_2 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_7)
  %1 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %val2, i32 0, i32 1
  %_18 = load i32, ptr %1, align 8, !noundef !5
  %_12 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_18)
  %2 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %val2, i32 0, i32 1
  %3 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %2, i32 0, i32 1
  %_25 = load i8, ptr %3, align 4, !noundef !5
  %_24 = zext i8 %_25 to i32
  %_19 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_24)
  ret void
}

; repro::fn1
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro3fn117h7928ea28c0896337E(i64 %0, i32 %_2, i64 %1, i64 %2) unnamed_addr #2 {
start:
  %3 = alloca i64, align 8
  %_16 = alloca { i32, i8, i8, [2 x i8] }, align 4
  %_15 = alloca [1 x i8], align 1
  %_14 = alloca { [8 x i128], { i32, i8, i8, [2 x i8] } }, align 8
  %_13 = alloca i8, align 1
  %_11 = alloca { [8 x i128], { i32, i8, i8, [2 x i8] } }, align 8
  %_9 = alloca [8 x i128], align 8
  %_7 = alloca [5 x i32], align 4
  %_4 = alloca i64, align 8
  %_3 = alloca i64, align 8
  %_1 = alloca i64, align 8
  store i64 %0, ptr %_1, align 8
  store i64 %1, ptr %_3, align 8
  store i64 %2, ptr %_4, align 8
  br label %bb1

bb1:                                              ; preds = %bb3, %start
  %4 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 0
  store i32 %_2, ptr %4, align 4
  %5 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 1
  store i32 %_2, ptr %5, align 4
  %6 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 2
  store i32 %_2, ptr %6, align 4
  %7 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 3
  store i32 %_2, ptr %7, align 4
  %8 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 4
  store i32 %_2, ptr %8, align 4
  br label %bb2

bb2:                                              ; preds = %bb2, %bb1
  %9 = load i64, ptr %_3, align 8, !noundef !5
  store i64 %9, ptr %_4, align 8
  %10 = load i64, ptr %_1, align 8, !noundef !5
  %11 = load i64, ptr %_1, align 8, !noundef !5
  %_10 = mul i64 %10, %11
  store i64 %_10, ptr %_1, align 8
  %12 = getelementptr inbounds [8 x i128], ptr %_9, i64 0, i64 0
  call void @llvm.memset.p0.i64(ptr align 8 %12, i8 0, i64 128, i1 false)
  %13 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  store i32 -571572757, ptr %13, align 8
  %14 = load i64, ptr %_1, align 8, !noundef !5
  %15 = shl i64 %14, 55
  store i64 %15, ptr %_3, align 8
  %16 = load i64, ptr %_4, align 8, !noundef !5
  switch i64 %16, label %bb7 [
    i64 1, label %bb2
    i64 -47, label %bb3
  ]

bb7:                                              ; preds = %bb5, %bb3, %bb2
  ret void

bb3:                                              ; preds = %bb2
  %17 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  store i32 %_2, ptr %17, align 8
  %18 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
; call repro::fn3
  %19 = call i64 @_ZN5repro3fn317h2919c21872baa48eE()
  store i64 %19, ptr %3, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %18, ptr align 8 %3, i64 8, i1 false)
  %20 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
  %21 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %20, i32 0, i32 1
  %22 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  %23 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %22, i32 0, i32 1
  %24 = load i8, ptr %23, align 4, !noundef !5
  %25 = xor i8 %24, -1
  store i8 %25, ptr %21, align 4
  %26 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
  %27 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %26, i32 0, i32 1
  %28 = load i8, ptr %27, align 4, !noundef !5
  store i8 %28, ptr %_13, align 1
  %29 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  %30 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %29, i32 0, i32 1
  %31 = load i64, ptr %_1, align 8, !noundef !5
  %32 = trunc i64 %31 to i8
  store i8 %32, ptr %30, align 4
  %33 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  %34 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %33, i32 0, i32 2
  %35 = load i8, ptr %34, align 1, !range !6, !noundef !5
  %_12 = trunc i8 %35 to i1
  %36 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
  %37 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  %38 = load i32, ptr %37, align 8, !noundef !5
  store i32 %38, ptr %36, align 8
  %39 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  %40 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %39, i32 0, i32 1
  %41 = load i8, ptr %40, align 4, !noundef !5
  %42 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %36, i32 0, i32 1
  store i8 %41, ptr %42, align 4
  %43 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %36, i32 0, i32 2
  %44 = zext i1 %_12 to i8
  store i8 %44, ptr %43, align 1
  %45 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
  %46 = load i32, ptr %45, align 8, !noundef !5
  store i32 %46, ptr %_16, align 4
  %47 = load i8, ptr %_13, align 1, !noundef !5
  %48 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_16, i32 0, i32 1
  store i8 %47, ptr %48, align 4
  %49 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
  %50 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %49, i32 0, i32 2
  %51 = load i8, ptr %50, align 1, !range !6, !noundef !5
  %52 = trunc i8 %51 to i1
  %53 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_16, i32 0, i32 2
  %54 = zext i1 %52 to i8
  store i8 %54, ptr %53, align 1
  %55 = load i8, ptr %_13, align 1, !noundef !5
  %56 = getelementptr inbounds [1 x i8], ptr %_15, i64 0, i64 0
  store i8 %55, ptr %56, align 1
  %57 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  %58 = load i32, ptr %57, align 8, !noundef !5
  switch i32 %58, label %bb7 [
    i32 0, label %bb1
    i32 1457839173, label %bb5
  ]

bb5:                                              ; preds = %bb3
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_14, ptr align 8 %_9, i64 128, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_11, ptr align 8 %_14, i64 128, i1 false)
  %59 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %59, ptr align 4 %_16, i64 8, i1 false)
; call repro::dump_var
  call void @_ZN5repro8dump_var17hc219273b1201587bE(ptr align 8 %_11)
; call repro::dump_var
  call void @_ZN5repro8dump_var17hc219273b1201587bE(ptr align 8 %_14)
  br label %bb7
}

; repro::fn3
; Function Attrs: nonlazybind uwtable
define internal i64 @_ZN5repro3fn317h2919c21872baa48eE() unnamed_addr #2 {
start:
  %_0 = alloca { i32, i8, i8, [2 x i8] }, align 4
  store i32 1457839173, ptr %_0, align 4
  %0 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_0, i32 0, i32 1
  store i8 22, ptr %0, align 4
  %1 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_0, i32 0, i32 2
  store i8 1, ptr %1, align 1
  %2 = load i64, ptr %_0, align 4
  ret i64 %2
}

; repro::main
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro4main17he6378134329df5bdE() unnamed_addr #2 {
start:
; call repro::fn1
  call void @_ZN5repro3fn117h7928ea28c0896337E(i64 -47, i32 97, i64 -47, i64 -47)
  ret void
}

; Function Attrs: nonlazybind uwtable
define i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #2 {
start:
  ret i32 0
}

; Function Attrs: nonlazybind uwtable
declare i32 @printf(ptr, ...) unnamed_addr #2

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #4

; Function Attrs: nonlazybind
define i32 @main(i32 %0, ptr %1) unnamed_addr #5 {
  call void @_ZN5repro3fn117h7928ea28c0896337E(i64 -47, i32 97, i64 -47, i64 -47)
  ret i32 0
}

attributes #0 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { nonlazybind "target-cpu"="x86-64" }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
!3 = !{!"rustc version 1.76.0-nightly (a1a37735c 2023-11-23)"}
!4 = !{i32 2020822}
!5 = !{}
!6 = !{i8 0, i8 2}
nikic commented 10 months ago

Thanks for the new example. This does look like a LICM scalar promotion miscompile to me. Before LICM we have:

  %i12.le = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i64 0, i32 1
  store i32 97, ptr %i12.le, align 8
  store i64 1195458747461, ptr %i12.le, align 4
  %i19 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i64 0, i32 1
  %i20 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i64 0, i32 1, i32 1
  %i22 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i64 0, i32 1, i32 1
  %i23 = load i8, ptr %i22, align 4, !noundef !4
  %i24 = xor i8 %i23, -1
  store i8 %i24, ptr %i20, align 4
  %i31 = trunc i64 %_10.lcssa8 to i8
  store i8 %i31, ptr %i22, align 4

After LICM we have:

  store i32 97, ptr %i12.le, align 8
  store i64 1195458747461, ptr %i12.le, align 4
  %i24 = xor i8 %i2311, -1
  %i31 = trunc i64 %_10.lcssa8 to i8

It looks like load i8, ptr %i22 was promoted, even though there is a wide store that clobbers the memory store i64 1195458747461, ptr %i12.le.

nikic commented 10 months ago

I believe this is a good reduction of the issue:

define void @test(i1 %c, i8 %x) {
start:
  %a = alloca [16 x i8], align 8
  %p = getelementptr inbounds i8, ptr %a, i64 8
  %p.copy = getelementptr inbounds i8, ptr %a, i64 8
  %p2 = getelementptr inbounds i8, ptr %a, i64 12
  br label %loop

loop:
  store i32 u0x11111111, ptr %p
  store i32 u0x22, ptr %p.copy
  store i64 u0x3333333333333333, ptr %p.copy
  %val = load i8, ptr %p2
  call void @use(i8 %val)
  store i8 0, ptr %p2
  br i1 %c, label %loop, label %exit

exit:
  ret void
}

declare void @use(i8)

This turns into:

  %0 = phi i8 [ 0, %loop ], [ %p2.promoted, %start ]
  store i32 286331153, ptr %p, align 4
  store i32 34, ptr %p.copy, align 4
  store i64 3689348814741910323, ptr %p.copy, align 4
  call void @use(i8 %0)

To reproduce, we need all three of the stores, and we need the stores to happen through two different but identical pointers.

nikic commented 10 months ago

print-alias-sets for this example gives:

Alias Set Tracker: 3 alias sets for 3 pointer values.
  AliasSet[0x8b8b690, 2] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(4)), (ptr %p.copy, LocationSize::upperBound(8))
  AliasSet[0x8b8b7c0, 1] must alias, Mod/Ref   Pointers: (ptr %p2, LocationSize::precise(1))
  AliasSet[0x8b8c020, 1] may alias, Mod/Ref   
    1 Unknown instructions:   call void @use(i8 %val)

Which is not correct. The first two alias sets should have been merged. The bug is likely in AST.

nikic commented 10 months ago

I believe the problem is that AST has special handling for MustAlias sets, where it will only check the first PointerRec, as they should all be equivalent anyway. However, while the pointers are all MustAlias, they can have different LocationSizes. So if the first one ends up having size 4 and the second one size 8, we will only check against the first one and determine that there is NoModRef.