Closed cbeuw closed 10 months ago
What's the original test case, not pre-licm? I think this is an artifact of accessing uninitialized memory.
This is the reproduction in Rust's custom MIR:
#![feature(custom_mir, core_intrinsics)]
#![allow(unused_parens, unused_assignments, overflowing_literals)]
extern crate core;
use core::intrinsics::mir::*;
#[inline(never)]
fn dump_var1(
f: usize,
var0: usize,
val0: [char; 7],
var1: usize,
val1: char,
var2: usize,
val2: u16,
var3: usize,
val3: ([u128;8], (u32, u8, bool)),
) {
println!("{}", val3.1.1);
}
#[inline(never)]
fn dump_var2(
f: usize,
var0: usize,
val0: u64,
var1: usize,
val1: char,
var2: usize,
val2: ([u128; 8], (u32, u8, bool)),
var3: usize,
val3: (u32, u8,bool),
) {
println!("{}", val2.1.1);
println!("{}", val3.1);
}
#[custom_mir(dialect = "runtime", phase = "initial")]
fn fn1() {
mir! {
let _1: isize;
let _5: u64;
let _6: isize;
let _10: isize;
let _12: isize;
let _14: isize;
let _20: u16;
let _25: isize;
let _28: u16;
let _29: u64;
let _23: bool;
let _30: bool;
let _9: char;
let _13: char;
let _19: char;
let _21: [char; 5];
let _24: [u128; 8];
let _26: [char; 7];
let _36: [u8; 1];
let _32: (u16, i128);
let _37: (u32, u8, bool);
let _31: Adt64;
let _27: ([u128; 8], (u32, u8, bool));
let _33: ([u128; 8], (u32, u8, bool));
let _43: ();
{
_1 = -47;
_5 = 0;
_6 = 0;
_9 = 'a';
_10 = _1;
_12 = _1;
_13 = 'a';
_19 = _9;
_20 = 1207_u16;
Goto(bb4)
}
bb4 = {
_21 = [_13,_9,_9,_9,_19];
Goto(bb5)
}
bb5 = {
_14 = _12;
_25 = _1 * _10;
_23 = _5 != _5;
_10 = _25;
_24 = [250977496372426266842391755486066952967_u128,279914534456615290421521357532699476871_u128,246375854589549477800802334812336807930_u128,214765644317434283193208114197787180018_u128,17428368806739377979573757188831317740_u128,215649541041365377629339076192777392465_u128,99533020080114872928824195912378985846_u128,141971359805814584223867407839838043800_u128];
_27.1.0 = !571572756_u32;
_28 = _9 as u16;
_12 = _10 << _20;
_26 = [_19,_13,_13,_13,_13,_9,_19];
match _14 {
1 => bb5,
340282366920938463463374607431768211409 => bb10,
_ => bb20
}
}
bb10 = {
_29 = !_5;
_27.1.0 = _9 as u32;
Call(_27.1 = fn3(_23), bb11)
}
bb11 = {
_33.1.1 = !_27.1.1;
_31 = Adt64 { fld0: _33.1.1 };
_27.1.1 = _6 as u8;
_30 = _27.1.2;
_32 = (_28, 17720767051099432542734011916916350651_i128);
_33.1 = (_27.1.0, _27.1.1, _30);
_37 = (_33.1.0, _31.fld0, _33.1.2);
_36 = [_31.fld0];
match _27.1.0 {
0 => bb4,
1457839173 => bb16,
_ => bb20
}
}
bb16 = {
_33.0 = _24;
_27 = (_33.0, _37);
Goto(bb17)
}
bb17 = {
Call(_43 = dump_var1(1_usize, 26_usize, Move(_26), 19_usize, Move(_19), 20_usize, Move(_20), 27_usize, Move(_27)), bb19)
}
bb19 = {
Call(_43 = dump_var2(1_usize, 29_usize, Move(_29), 13_usize, Move(_13), 33_usize, Move(_33), 37_usize, Move(_37)), bb20)
}
bb20 = {
Return()
}
}
}
fn fn3(mut _15: bool) -> (u32, u8, bool) {
return (1457839173_u32, 22_u8, _15);
}
pub fn main() {
fn1();
}
pub struct Adt64 {
fld0: u8,
}
$ rustc -Zmir-opt-level=0 -Copt-level=0 repro.rs && ./repro
233
0
233
$ rustc -Zmir-opt-level=0 -Copt-level=1 repro.rs && ./repro
255
0
255
$ rustc -Zmir-opt-level=0 -Copt-level=2 repro.rs && ./repro
0
0
0
But I couldn't get a pure IR reproduction without comparing before/after LICM
I got an original IR reproduction which shouldn't have any uninit memory access. It's self-standing.
Right:
$ clang -O0 repro.ll && ./a.out
0
1457839173
233
0
1457839173
161
Wrong:
$ clang -O1 repro.ll && ./a.out
0
1457839173
255
0
1457839173
161
Reproduction:
; ModuleID = 'repro.6b9acda9b5bccdd7-cgu.0'
source_filename = "repro.6b9acda9b5bccdd7-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17he90579972123ee41E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h56f5507f08b0f565E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E" }>, align 8
@alloc_87551382a9de3243abbfdbda2f0b586b = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"%d\0A\00" }>, align 1
; std::sys_common::backtrace::__rust_begin_short_backtrace
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7690185bae7be86dE(ptr %f) unnamed_addr #0 {
start:
; call core::ops::function::FnOnce::call_once
call void @_ZN4core3ops8function6FnOnce9call_once17h51fc9644382a8c58E(ptr %f)
call void asm sideeffect "", "~{memory}"(), !srcloc !4
ret void
}
; std::rt::lang_start::{{closure}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E"(ptr align 8 %_1) unnamed_addr #1 {
start:
%self = alloca i8, align 1
%_4 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call std::sys_common::backtrace::__rust_begin_short_backtrace
call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7690185bae7be86dE(ptr %_4)
; call <() as std::process::Termination>::report
%0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h66ba62261e510d16E"()
store i8 %0, ptr %self, align 1
%_6 = load i8, ptr %self, align 1, !noundef !5
%_0 = zext i8 %_6 to i32
ret i32 %_0
}
; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h56f5507f08b0f565E"(ptr %_1) unnamed_addr #1 {
start:
%_2 = alloca {}, align 1
%0 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call core::ops::function::FnOnce::call_once
%_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h64ee9a53e78ade0cE(ptr %0)
ret i32 %_0
}
; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core3ops8function6FnOnce9call_once17h51fc9644382a8c58E(ptr %_1) unnamed_addr #1 {
start:
%_2 = alloca {}, align 1
call void %_1()
ret void
}
; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h64ee9a53e78ade0cE(ptr %0) unnamed_addr #1 personality ptr @rust_eh_personality {
start:
%1 = alloca { ptr, i32 }, align 8
%_2 = alloca {}, align 1
%_1 = alloca ptr, align 8
store ptr %0, ptr %_1, align 8
; invoke std::rt::lang_start::{{closure}}
%_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h8317b2bf36a059c4E"(ptr align 8 %_1)
to label %bb1 unwind label %cleanup
bb3: ; preds = %cleanup
%2 = load ptr, ptr %1, align 8, !noundef !5
%3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
%4 = load i32, ptr %3, align 8, !noundef !5
%5 = insertvalue { ptr, i32 } poison, ptr %2, 0
%6 = insertvalue { ptr, i32 } %5, i32 %4, 1
resume { ptr, i32 } %6
cleanup: ; preds = %start
%7 = landingpad { ptr, i32 }
cleanup
%8 = extractvalue { ptr, i32 } %7, 0
%9 = extractvalue { ptr, i32 } %7, 1
%10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
store ptr %8, ptr %10, align 8
%11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
store i32 %9, ptr %11, align 8
br label %bb3
bb1: ; preds = %start
ret i32 %_0
}
; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17he90579972123ee41E"(ptr align 8 %_1) unnamed_addr #1 {
start:
ret void
}
; <() as std::process::Termination>::report
; Function Attrs: inlinehint nonlazybind uwtable
define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h66ba62261e510d16E"() unnamed_addr #1 {
start:
ret i8 0
}
; repro::dump_var
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN5repro8dump_var17hc219273b1201587bE(ptr align 8 %val2) unnamed_addr #0 {
start:
%0 = getelementptr inbounds [8 x i128], ptr %val2, i64 0, i64 0
%_8 = load i128, ptr %0, align 8, !noundef !5
%_7 = trunc i128 %_8 to i32
%_2 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_7)
%1 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %val2, i32 0, i32 1
%_18 = load i32, ptr %1, align 8, !noundef !5
%_12 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_18)
%2 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %val2, i32 0, i32 1
%3 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %2, i32 0, i32 1
%_25 = load i8, ptr %3, align 4, !noundef !5
%_24 = zext i8 %_25 to i32
%_19 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_24)
ret void
}
; repro::fn1
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro3fn117h7928ea28c0896337E(i64 %0, i32 %_2, i64 %1, i64 %2) unnamed_addr #2 {
start:
%3 = alloca i64, align 8
%_16 = alloca { i32, i8, i8, [2 x i8] }, align 4
%_15 = alloca [1 x i8], align 1
%_14 = alloca { [8 x i128], { i32, i8, i8, [2 x i8] } }, align 8
%_13 = alloca i8, align 1
%_11 = alloca { [8 x i128], { i32, i8, i8, [2 x i8] } }, align 8
%_9 = alloca [8 x i128], align 8
%_7 = alloca [5 x i32], align 4
%_4 = alloca i64, align 8
%_3 = alloca i64, align 8
%_1 = alloca i64, align 8
store i64 %0, ptr %_1, align 8
store i64 %1, ptr %_3, align 8
store i64 %2, ptr %_4, align 8
br label %bb1
bb1: ; preds = %bb3, %start
%4 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 0
store i32 %_2, ptr %4, align 4
%5 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 1
store i32 %_2, ptr %5, align 4
%6 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 2
store i32 %_2, ptr %6, align 4
%7 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 3
store i32 %_2, ptr %7, align 4
%8 = getelementptr inbounds [5 x i32], ptr %_7, i64 0, i64 4
store i32 %_2, ptr %8, align 4
br label %bb2
bb2: ; preds = %bb2, %bb1
%9 = load i64, ptr %_3, align 8, !noundef !5
store i64 %9, ptr %_4, align 8
%10 = load i64, ptr %_1, align 8, !noundef !5
%11 = load i64, ptr %_1, align 8, !noundef !5
%_10 = mul i64 %10, %11
store i64 %_10, ptr %_1, align 8
%12 = getelementptr inbounds [8 x i128], ptr %_9, i64 0, i64 0
call void @llvm.memset.p0.i64(ptr align 8 %12, i8 0, i64 128, i1 false)
%13 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
store i32 -571572757, ptr %13, align 8
%14 = load i64, ptr %_1, align 8, !noundef !5
%15 = shl i64 %14, 55
store i64 %15, ptr %_3, align 8
%16 = load i64, ptr %_4, align 8, !noundef !5
switch i64 %16, label %bb7 [
i64 1, label %bb2
i64 -47, label %bb3
]
bb7: ; preds = %bb5, %bb3, %bb2
ret void
bb3: ; preds = %bb2
%17 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
store i32 %_2, ptr %17, align 8
%18 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
; call repro::fn3
%19 = call i64 @_ZN5repro3fn317h2919c21872baa48eE()
store i64 %19, ptr %3, align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %18, ptr align 8 %3, i64 8, i1 false)
%20 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
%21 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %20, i32 0, i32 1
%22 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
%23 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %22, i32 0, i32 1
%24 = load i8, ptr %23, align 4, !noundef !5
%25 = xor i8 %24, -1
store i8 %25, ptr %21, align 4
%26 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
%27 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %26, i32 0, i32 1
%28 = load i8, ptr %27, align 4, !noundef !5
store i8 %28, ptr %_13, align 1
%29 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
%30 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %29, i32 0, i32 1
%31 = load i64, ptr %_1, align 8, !noundef !5
%32 = trunc i64 %31 to i8
store i8 %32, ptr %30, align 4
%33 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
%34 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %33, i32 0, i32 2
%35 = load i8, ptr %34, align 1, !range !6, !noundef !5
%_12 = trunc i8 %35 to i1
%36 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
%37 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
%38 = load i32, ptr %37, align 8, !noundef !5
store i32 %38, ptr %36, align 8
%39 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
%40 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %39, i32 0, i32 1
%41 = load i8, ptr %40, align 4, !noundef !5
%42 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %36, i32 0, i32 1
store i8 %41, ptr %42, align 4
%43 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %36, i32 0, i32 2
%44 = zext i1 %_12 to i8
store i8 %44, ptr %43, align 1
%45 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
%46 = load i32, ptr %45, align 8, !noundef !5
store i32 %46, ptr %_16, align 4
%47 = load i8, ptr %_13, align 1, !noundef !5
%48 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_16, i32 0, i32 1
store i8 %47, ptr %48, align 4
%49 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i32 0, i32 1
%50 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %49, i32 0, i32 2
%51 = load i8, ptr %50, align 1, !range !6, !noundef !5
%52 = trunc i8 %51 to i1
%53 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_16, i32 0, i32 2
%54 = zext i1 %52 to i8
store i8 %54, ptr %53, align 1
%55 = load i8, ptr %_13, align 1, !noundef !5
%56 = getelementptr inbounds [1 x i8], ptr %_15, i64 0, i64 0
store i8 %55, ptr %56, align 1
%57 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
%58 = load i32, ptr %57, align 8, !noundef !5
switch i32 %58, label %bb7 [
i32 0, label %bb1
i32 1457839173, label %bb5
]
bb5: ; preds = %bb3
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_14, ptr align 8 %_9, i64 128, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_11, ptr align 8 %_14, i64 128, i1 false)
%59 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i32 0, i32 1
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %59, ptr align 4 %_16, i64 8, i1 false)
; call repro::dump_var
call void @_ZN5repro8dump_var17hc219273b1201587bE(ptr align 8 %_11)
; call repro::dump_var
call void @_ZN5repro8dump_var17hc219273b1201587bE(ptr align 8 %_14)
br label %bb7
}
; repro::fn3
; Function Attrs: nonlazybind uwtable
define internal i64 @_ZN5repro3fn317h2919c21872baa48eE() unnamed_addr #2 {
start:
%_0 = alloca { i32, i8, i8, [2 x i8] }, align 4
store i32 1457839173, ptr %_0, align 4
%0 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_0, i32 0, i32 1
store i8 22, ptr %0, align 4
%1 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %_0, i32 0, i32 2
store i8 1, ptr %1, align 1
%2 = load i64, ptr %_0, align 4
ret i64 %2
}
; repro::main
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro4main17he6378134329df5bdE() unnamed_addr #2 {
start:
; call repro::fn1
call void @_ZN5repro3fn117h7928ea28c0896337E(i64 -47, i32 97, i64 -47, i64 -47)
ret void
}
; Function Attrs: nonlazybind uwtable
define i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #2 {
start:
ret i32 0
}
; Function Attrs: nonlazybind uwtable
declare i32 @printf(ptr, ...) unnamed_addr #2
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #4
; Function Attrs: nonlazybind
define i32 @main(i32 %0, ptr %1) unnamed_addr #5 {
call void @_ZN5repro3fn117h7928ea28c0896337E(i64 -47, i32 97, i64 -47, i64 -47)
ret i32 0
}
attributes #0 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { nonlazybind "target-cpu"="x86-64" }
!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}
!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
!3 = !{!"rustc version 1.76.0-nightly (a1a37735c 2023-11-23)"}
!4 = !{i32 2020822}
!5 = !{}
!6 = !{i8 0, i8 2}
Thanks for the new example. This does look like a LICM scalar promotion miscompile to me. Before LICM we have:
%i12.le = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i64 0, i32 1
store i32 97, ptr %i12.le, align 8
store i64 1195458747461, ptr %i12.le, align 4
%i19 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i64 0, i32 1
%i20 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_14, i64 0, i32 1, i32 1
%i22 = getelementptr inbounds { [8 x i128], { i32, i8, i8, [2 x i8] } }, ptr %_11, i64 0, i32 1, i32 1
%i23 = load i8, ptr %i22, align 4, !noundef !4
%i24 = xor i8 %i23, -1
store i8 %i24, ptr %i20, align 4
%i31 = trunc i64 %_10.lcssa8 to i8
store i8 %i31, ptr %i22, align 4
After LICM we have:
store i32 97, ptr %i12.le, align 8
store i64 1195458747461, ptr %i12.le, align 4
%i24 = xor i8 %i2311, -1
%i31 = trunc i64 %_10.lcssa8 to i8
It looks like load i8, ptr %i22
was promoted, even though there is a wide store that clobbers the memory store i64 1195458747461, ptr %i12.le
.
I believe this is a good reduction of the issue:
define void @test(i1 %c, i8 %x) {
start:
%a = alloca [16 x i8], align 8
%p = getelementptr inbounds i8, ptr %a, i64 8
%p.copy = getelementptr inbounds i8, ptr %a, i64 8
%p2 = getelementptr inbounds i8, ptr %a, i64 12
br label %loop
loop:
store i32 u0x11111111, ptr %p
store i32 u0x22, ptr %p.copy
store i64 u0x3333333333333333, ptr %p.copy
%val = load i8, ptr %p2
call void @use(i8 %val)
store i8 0, ptr %p2
br i1 %c, label %loop, label %exit
exit:
ret void
}
declare void @use(i8)
This turns into:
%0 = phi i8 [ 0, %loop ], [ %p2.promoted, %start ]
store i32 286331153, ptr %p, align 4
store i32 34, ptr %p.copy, align 4
store i64 3689348814741910323, ptr %p.copy, align 4
call void @use(i8 %0)
To reproduce, we need all three of the stores, and we need the stores to happen through two different but identical pointers.
print-alias-sets
for this example gives:
Alias Set Tracker: 3 alias sets for 3 pointer values.
AliasSet[0x8b8b690, 2] must alias, Mod Pointers: (ptr %p, LocationSize::precise(4)), (ptr %p.copy, LocationSize::upperBound(8))
AliasSet[0x8b8b7c0, 1] must alias, Mod/Ref Pointers: (ptr %p2, LocationSize::precise(1))
AliasSet[0x8b8c020, 1] may alias, Mod/Ref
1 Unknown instructions: call void @use(i8 %val)
Which is not correct. The first two alias sets should have been merged. The bug is likely in AST.
I believe the problem is that AST has special handling for MustAlias sets, where it will only check the first PointerRec, as they should all be equivalent anyway. However, while the pointers are all MustAlias, they can have different LocationSizes. So if the first one ends up having size 4 and the second one size 8, we will only check against the first one and determine that there is NoModRef.
When compiled with the helper
Should print
However, with LICM optimisation, this prints something else