llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
26.82k stars 10.99k forks source link

[PowerPC] power9 f128 ABI issue: bitcasts can reverse halves #92246

Open tgross35 opened 1 month ago

tgross35 commented 1 month ago

Doing a roundtrip i128 -> f128 -> i128 should work but it appears to be broken when the calls are external, possibly depending on some optimizations. It seems like the f128/i128 is getting split into two 64-bit values, which are getting reversed at some point. The problem does not seem to happen without enabling the pwr9 target feature.

Originally identified with rust in https://github.com/rust-lang/rust/issues/125102. Source files:

// transmute_lib.rs

#![feature(f128)]
#![allow(improper_ctypes_definitions)] // ok since llvm 18

use std::mem::transmute;

#[no_mangle]
pub extern "C" fn entry(a: u128) -> f128 {
    unsafe { transmute::<u128, f128>(a) }
}
// transmute_call_lib.rs

#![feature(f128)]
#![allow(improper_ctypes)] // ok since llvm 18

extern crate transmute_lib;

use std::mem::transmute;
use std::process::exit;

extern "C" {
    fn entry(a: u128) -> f128;
}

fn main() {
    let a = unsafe { entry(0x1) }; // bitcast in an external function
    let res = unsafe { transmute::<f128, u128>(a) }; // bitcast back

    if res == 0x1 {
        exit(0); // correct
    } else if res == 0x1 << 64 {
        exit(1); // incorrect value with endianness flipped
    } else {
        exit(2); // unexpected value
    }
}

Versions:

$ rustc -Vv
rustc 1.80.0-nightly (9c9b56879 2024-05-05)
binary: rustc
commit-hash: 9c9b568792ef20d8459c745345dd3e79b7c7fa8c
commit-date: 2024-05-05
host: x86_64-unknown-linux-gnu
release: 1.80.0-nightly
LLVM version: 18.1.

Compilation:

rustc transmute_lib.rs --target powerpc64-unknown-linux-gnu -C linker=powerpc64-linux-gnu-gcc -Ctarget-cpu=pwr9 --crate-type=lib
rustc transmute_call_lib.rs --target powerpc64-unknown-linux-gnu -C linker=powerpc64-linux-gnu-gcc -Ctarget-cpu=pwr9 -o transmute_call_lib.rs.ppc64.pwr9 --extern transmute_lib=libtransmute_lib.rlib

Execution:

$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ transmute_call_lib.rs.ppc64.pwr9; echo $?
1

The above should output 0. Full IR:

Library function ```llvm ; ModuleID = 'transmute_lib.1e179323af95c5c5-cgu.0' source_filename = "transmute_lib.1e179323af95c5c5-cgu.0" target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: uwtable define fp128 @entry(i128 %a) unnamed_addr #0 { start: %_0 = bitcast i128 %a to fp128 ret fp128 %_0 } attributes #0 = { uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{!"rustc version 1.80.0-nightly (9c9b56879 2024-05-05)"} ```
Executable The only relevant part is in `transmute_call_lib::main` ```llvm ; ModuleID = 'transmute_call_lib.1791869a4d73965-cgu.0' source_filename = "transmute_call_lib.1791869a4d73965-cgu.0" target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64-unknown-linux-gnu" @vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lan ; std::sys_common::backtrace::__rust_begin_short_backtrace ; Function Attrs: noinline uwtable define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf94855fdbb5829c5E(ptr %f) unnamed_addr #0 { start: ; call core::ops::function::FnOnce::call_once call void @_ZN4core3ops8function6FnOnce9call_once17h3056e9bd87d66482E(ptr %f) call void asm sideeffect "", "~{memory}"(), !srcloc !3 ret void } ; std::rt::lang_start ; Function Attrs: uwtable define hidden i64 @_ZN3std2rt10lang_start17hdc6989dbc5702db1E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { start: %_8 = alloca [8 x i8], align 8 %_5 = alloca [8 x i8], align 8 store ptr %main, ptr %_8, align 8 ; call std::rt::lang_start_internal %0 = call i64 @_ZN3std2rt19lang_start_internal17he440c14c55fd4760E(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, store i64 %0, ptr %_5, align 8 %v = load i64, ptr %_5, align 8 ret i64 %v } ; std::rt::lang_start::{{closure}} ; Function Attrs: inlinehint uwtable define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hdb386b686ec8565dE"(ptr align 8 %_1) unnamed_addr #2 { start: %self = alloca [1 x i8], align 1 %_4 = load ptr, ptr %_1, align 8 ; call std::sys_common::backtrace::__rust_begin_short_backtrace call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf94855fdbb5829c5E(ptr %_4) ; call <() as std::process::Termination>::report %0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h3606faab680ab85fE"() store i8 %0, ptr %self, align 1 %_6 = load i8, ptr %self, align 1 %_0 = zext i8 %_6 to i32 ret i32 %_0 } ; core::ops::function::FnOnce::call_once{{vtable.shim}} ; Function Attrs: inlinehint uwtable define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h956497c27ef6ef7fE"(ptr %_1) unnamed start: %_2 = alloca [0 x i8], align 1 %0 = load ptr, ptr %_1, align 8 ; call core::ops::function::FnOnce::call_once %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h9737a54d47c05e22E(ptr %0) ret i32 %_0 } ; core::ops::function::FnOnce::call_once ; Function Attrs: inlinehint uwtable define internal void @_ZN4core3ops8function6FnOnce9call_once17h3056e9bd87d66482E(ptr %_1) unnamed_addr #2 { start: %_2 = alloca [0 x i8], align 1 call void %_1() ret void } ; core::ops::function::FnOnce::call_once ; Function Attrs: inlinehint uwtable define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h9737a54d47c05e22E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_p start: %1 = alloca [16 x i8], align 8 %_2 = alloca [0 x i8], align 1 %_1 = alloca [8 x i8], align 8 store ptr %0, ptr %_1, align 8 ; invoke std::rt::lang_start::{{closure}} %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hdb386b686ec8565dE"(ptr align 8 %_1) to label %bb1 unwind label %cleanup bb3: ; preds = %cleanup %2 = load ptr, ptr %1, align 8 %3 = getelementptr inbounds i8, ptr %1, i64 8 %4 = load i32, ptr %3, align 8 %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 resume { ptr, i32 } %6 cleanup: ; preds = %start %7 = landingpad { ptr, i32 } cleanup %8 = extractvalue { ptr, i32 } %7, 0 %9 = extractvalue { ptr, i32 } %7, 1 store ptr %8, ptr %1, align 8 %10 = getelementptr inbounds i8, ptr %1, i64 8 store i32 %9, ptr %10, align 8 br label %bb3 bb1: ; preds = %start ret i32 %_0 } ; core::ptr::drop_in_place::{{closure}}> ; Function Attrs: inlinehint uwtable define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h64db6 start: ret void } ; <() as std::process::Termination>::report ; Function Attrs: inlinehint uwtable define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h3606faab680ab85fE"() unnamed_addr #2 { start: ret i8 0 } ; transmute_call_lib::main ; Function Attrs: uwtable define internal void @_ZN18transmute_call_lib4main17hfa19d9d60e61b95dE() unnamed_addr #1 { start: %a = call fp128 @entry(i128 1) %res = bitcast fp128 %a to i128 %0 = icmp eq i128 %res, 1 br i1 %0, label %bb2, label %bb3 bb2: ; preds = %start ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 0) #5 unreachable bb3: ; preds = %start %_4 = icmp eq i128 %res, 18446744073709551616 br i1 %_4, label %bb5, label %bb6 bb6: ; preds = %bb3 ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 2) #5 unreachable bb5: ; preds = %bb3 ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 1) #5 unreachable } ; std::rt::lang_start_internal ; Function Attrs: uwtable declare i64 @_ZN3std2rt19lang_start_internal17he440c14c55fd4760E(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 ; Function Attrs: uwtable declare zeroext i32 @rust_eh_personality(i32 signext, i32 zeroext, i64, ptr, ptr) unnamed_addr #1 ; Function Attrs: uwtable declare fp128 @entry(i128) unnamed_addr #1 ; std::process::exit ; Function Attrs: noreturn uwtable declare void @_ZN3std7process4exit17hb2f96155fda111fdE(i32) unnamed_addr #3 define i32 @main(i32 %0, ptr %1) unnamed_addr #4 { top: %2 = sext i32 %0 to i64 ; call std::rt::lang_start %3 = call i64 @_ZN3std2rt10lang_start17hdc6989dbc5702db1E(ptr @_ZN18transmute_call_lib4main17hfa19d9d60e61b95dE, i64 %2, ptr %1, i8 0) %4 = trunc i64 %3 to i32 ret i32 %4 } attributes #0 = { noinline uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #1 = { uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #2 = { inlinehint uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #3 = { noreturn uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #4 = { "target-cpu"="pwr9" } attributes #5 = { noreturn } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.80.0-nightly (9c9b56879 2024-05-05)"} !3 = !{i32 1628919} ```

This could be indirectly related to https://github.com/llvm/llvm-project/issues/92233#issuecomment-2112775463 since that needs use of f128 to cause the crash.

RalfJung commented 1 month ago

incorrect value with endianness flipped

Flipped endianess would be 0x1 << (128-8), no? This is some sort of mixed endianess where the two 64bit-chunks are little-endian but then internally they're big-endian.

tgross35 commented 1 month ago

Yeah, more like maintaining scalar endianness but laying out a composite struct f128 { uint8_t upper, lower; } representation as if it is for the incorrect endianness.

llvmbot commented 1 month ago

@llvm/issue-subscribers-backend-powerpc

Author: Trevor Gross (tgross35)

Doing a roundtrip i128 -> f128 -> i128 should work but it appears to be broken when the calls are external, possibly depending on some optimizations. It seems like the `f128`/`i128` is getting split into two 64-bit values, which are getting reversed at some point. Originally identified with rust in https://github.com/rust-lang/rust/issues/125102. Source files: ```rust // transmute_lib.rs #![feature(f128)] #![allow(improper_ctypes_definitions)] // ok since llvm 18 use std::mem::transmute; #[no_mangle] pub extern "C" fn entry(a: u128) -> f128 { unsafe { transmute::<u128, f128>(a) } } ``` ```rust // transmute_call_lib.rs #![feature(f128)] #![allow(improper_ctypes)] // ok since llvm 18 extern crate transmute_lib; use std::mem::transmute; use std::process::exit; extern "C" { fn entry(a: u128) -> f128; } fn main() { let a = unsafe { entry(0x1) }; // bitcast in an external function let res = unsafe { transmute::<f128, u128>(a) }; // bitcast back if res == 0x1 { exit(0); // correct } else if res == 0x1 << 64 { exit(1); // incorrect value with endianness flipped } else { exit(2); // unexpected value } } ``` Versions: ``` $ rustc -Vv rustc 1.80.0-nightly (9c9b56879 2024-05-05) binary: rustc commit-hash: 9c9b568792ef20d8459c745345dd3e79b7c7fa8c commit-date: 2024-05-05 host: x86_64-unknown-linux-gnu release: 1.80.0-nightly LLVM version: 18.1. ``` Compilation: ``` rustc transmute_lib.rs --target powerpc64-unknown-linux-gnu -C linker=powerpc64-linux-gnu-gcc -Ctarget-cpu=pwr9 --crate-type=lib rustc transmute_call_lib.rs --target powerpc64-unknown-linux-gnu -C linker=powerpc64-linux-gnu-gcc -Ctarget-cpu=pwr9 -o transmute_call_lib.rs.ppc64.pwr9 --extern transmute_lib=libtransmute_lib.rlib ``` Execution: ``` $ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ transmute_call_lib.rs.ppc64.pwr9; echo $? 1 ``` The above should output 0. Full IR: <details> <summary>Library function</summary> ```llvm ; ModuleID = 'transmute_lib.1e179323af95c5c5-cgu.0' source_filename = "transmute_lib.1e179323af95c5c5-cgu.0" target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: uwtable define fp128 @entry(i128 %a) unnamed_addr #0 { start: %_0 = bitcast i128 %a to fp128 ret fp128 %_0 } attributes #0 = { uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{!"rustc version 1.80.0-nightly (9c9b56879 2024-05-05)"} ``` </details> <details> <summary>Executable</summary> The only relevant part is in `transmute_call_lib::main` ```llvm ; ModuleID = 'transmute_call_lib.1791869a4d73965-cgu.0' source_filename = "transmute_call_lib.1791869a4d73965-cgu.0" target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64-unknown-linux-gnu" @vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lan ; std::sys_common::backtrace::__rust_begin_short_backtrace ; Function Attrs: noinline uwtable define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf94855fdbb5829c5E(ptr %f) unnamed_addr #0 { start: ; call core::ops::function::FnOnce::call_once call void @_ZN4core3ops8function6FnOnce9call_once17h3056e9bd87d66482E(ptr %f) call void asm sideeffect "", "~{memory}"(), !srcloc !3 ret void } ; std::rt::lang_start ; Function Attrs: uwtable define hidden i64 @_ZN3std2rt10lang_start17hdc6989dbc5702db1E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { start: %_8 = alloca [8 x i8], align 8 %_5 = alloca [8 x i8], align 8 store ptr %main, ptr %_8, align 8 ; call std::rt::lang_start_internal %0 = call i64 @_ZN3std2rt19lang_start_internal17he440c14c55fd4760E(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, store i64 %0, ptr %_5, align 8 %v = load i64, ptr %_5, align 8 ret i64 %v } ; std::rt::lang_start::{{closure}} ; Function Attrs: inlinehint uwtable define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hdb386b686ec8565dE"(ptr align 8 %_1) unnamed_addr #2 { start: %self = alloca [1 x i8], align 1 %_4 = load ptr, ptr %_1, align 8 ; call std::sys_common::backtrace::__rust_begin_short_backtrace call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf94855fdbb5829c5E(ptr %_4) ; call <() as std::process::Termination>::report %0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h3606faab680ab85fE"() store i8 %0, ptr %self, align 1 %_6 = load i8, ptr %self, align 1 %_0 = zext i8 %_6 to i32 ret i32 %_0 } ; core::ops::function::FnOnce::call_once{{vtable.shim}} ; Function Attrs: inlinehint uwtable define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h956497c27ef6ef7fE"(ptr %_1) unnamed start: %_2 = alloca [0 x i8], align 1 %0 = load ptr, ptr %_1, align 8 ; call core::ops::function::FnOnce::call_once %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h9737a54d47c05e22E(ptr %0) ret i32 %_0 } ; core::ops::function::FnOnce::call_once ; Function Attrs: inlinehint uwtable define internal void @_ZN4core3ops8function6FnOnce9call_once17h3056e9bd87d66482E(ptr %_1) unnamed_addr #2 { start: %_2 = alloca [0 x i8], align 1 call void %_1() ret void } ; core::ops::function::FnOnce::call_once ; Function Attrs: inlinehint uwtable define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h9737a54d47c05e22E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_p start: %1 = alloca [16 x i8], align 8 %_2 = alloca [0 x i8], align 1 %_1 = alloca [8 x i8], align 8 store ptr %0, ptr %_1, align 8 ; invoke std::rt::lang_start::{{closure}} %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hdb386b686ec8565dE"(ptr align 8 %_1) to label %bb1 unwind label %cleanup bb3: ; preds = %cleanup %2 = load ptr, ptr %1, align 8 %3 = getelementptr inbounds i8, ptr %1, i64 8 %4 = load i32, ptr %3, align 8 %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 resume { ptr, i32 } %6 cleanup: ; preds = %start %7 = landingpad { ptr, i32 } cleanup %8 = extractvalue { ptr, i32 } %7, 0 %9 = extractvalue { ptr, i32 } %7, 1 store ptr %8, ptr %1, align 8 %10 = getelementptr inbounds i8, ptr %1, i64 8 store i32 %9, ptr %10, align 8 br label %bb3 bb1: ; preds = %start ret i32 %_0 } ; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}> ; Function Attrs: inlinehint uwtable define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h64db6 start: ret void } ; <() as std::process::Termination>::report ; Function Attrs: inlinehint uwtable define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h3606faab680ab85fE"() unnamed_addr #2 { start: ret i8 0 } ; transmute_call_lib::main ; Function Attrs: uwtable define internal void @_ZN18transmute_call_lib4main17hfa19d9d60e61b95dE() unnamed_addr #1 { start: %a = call fp128 @entry(i128 1) %res = bitcast fp128 %a to i128 %0 = icmp eq i128 %res, 1 br i1 %0, label %bb2, label %bb3 bb2: ; preds = %start ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 0) #5 unreachable bb3: ; preds = %start %_4 = icmp eq i128 %res, 18446744073709551616 br i1 %_4, label %bb5, label %bb6 bb6: ; preds = %bb3 ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 2) #5 unreachable bb5: ; preds = %bb3 ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 1) #5 unreachable } ; std::rt::lang_start_internal ; Function Attrs: uwtable declare i64 @_ZN3std2rt19lang_start_internal17he440c14c55fd4760E(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 ; Function Attrs: uwtable declare zeroext i32 @rust_eh_personality(i32 signext, i32 zeroext, i64, ptr, ptr) unnamed_addr #1 ; Function Attrs: uwtable declare fp128 @entry(i128) unnamed_addr #1 ; std::process::exit ; Function Attrs: noreturn uwtable declare void @_ZN3std7process4exit17hb2f96155fda111fdE(i32) unnamed_addr #3 define i32 @main(i32 %0, ptr %1) unnamed_addr #4 { top: %2 = sext i32 %0 to i64 ; call std::rt::lang_start %3 = call i64 @_ZN3std2rt10lang_start17hdc6989dbc5702db1E(ptr @_ZN18transmute_call_lib4main17hfa19d9d60e61b95dE, i64 %2, ptr %1, i8 0) %4 = trunc i64 %3 to i32 ret i32 %4 } attributes #0 = { noinline uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #1 = { uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #2 = { inlinehint uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #3 = { noreturn uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #4 = { "target-cpu"="pwr9" } attributes #5 = { noreturn } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.80.0-nightly (9c9b56879 2024-05-05)"} !3 = !{i32 1628919} ``` </details>
beetrees commented 1 month ago

Looking at the generated assembly (compiler explorer):

entry:
        mtvsrdd 34, 4, 3
        blr

main:
        # <prelude omitted>
        li 3, 0
        li 4, 1
        bl entry
        nop
        stxv 34, 128(1)
        # <rest of function>

and a copy of the Power ISA manual, the miscompilation appears to be in the entry function. The 128-bit integer is passed in GPR 3 and 4. The entry function then combines the GPR registers the wrong way round with mtvsrdd 34, 4, 3, placing the second doubleword at the start of the vector register instead of at the end.

llvmbot commented 1 month ago

Hi!

This issue may be a good introductory issue for people new to working on LLVM. If you would like to work on this issue, your first steps are:

  1. Check that no other contributor has already been assigned to this issue. If you believe that no one is actually working on it despite an assignment, ping the person. After one week without a response, the assignee may be changed.
  2. In the comments of this issue, request for it to be assigned to you, or just create a pull request after following the steps below. Mention this issue in the description of the pull request.
  3. Fix the issue locally.
  4. Run the test suite locally. Remember that the subdirectories under test/ create fine-grained testing targets, so you can e.g. use make check-clang-ast to only run Clang's AST tests.
  5. Create a Git commit.
  6. Run git clang-format HEAD~1 to format your changes.
  7. Open a pull request to the upstream repository on GitHub. Detailed instructions can be found in GitHub's documentation. Mention this issue in the description of the pull request.

If you have any further questions about this issue, don't hesitate to ask via a comment in the thread below.

llvmbot commented 1 month ago

@llvm/issue-subscribers-good-first-issue

Author: Trevor Gross (tgross35)

Doing a roundtrip i128 -> f128 -> i128 should work but it appears to be broken when the calls are external, possibly depending on some optimizations. It seems like the `f128`/`i128` is getting split into two 64-bit values, which are getting reversed at some point. The problem does not seem to happen without enabling the pwr9 target feature. Originally identified with rust in https://github.com/rust-lang/rust/issues/125102. Source files: ```rust // transmute_lib.rs #![feature(f128)] #![allow(improper_ctypes_definitions)] // ok since llvm 18 use std::mem::transmute; #[no_mangle] pub extern "C" fn entry(a: u128) -> f128 { unsafe { transmute::<u128, f128>(a) } } ``` ```rust // transmute_call_lib.rs #![feature(f128)] #![allow(improper_ctypes)] // ok since llvm 18 extern crate transmute_lib; use std::mem::transmute; use std::process::exit; extern "C" { fn entry(a: u128) -> f128; } fn main() { let a = unsafe { entry(0x1) }; // bitcast in an external function let res = unsafe { transmute::<f128, u128>(a) }; // bitcast back if res == 0x1 { exit(0); // correct } else if res == 0x1 << 64 { exit(1); // incorrect value with endianness flipped } else { exit(2); // unexpected value } } ``` Versions: ``` $ rustc -Vv rustc 1.80.0-nightly (9c9b56879 2024-05-05) binary: rustc commit-hash: 9c9b568792ef20d8459c745345dd3e79b7c7fa8c commit-date: 2024-05-05 host: x86_64-unknown-linux-gnu release: 1.80.0-nightly LLVM version: 18.1. ``` Compilation: ``` rustc transmute_lib.rs --target powerpc64-unknown-linux-gnu -C linker=powerpc64-linux-gnu-gcc -Ctarget-cpu=pwr9 --crate-type=lib rustc transmute_call_lib.rs --target powerpc64-unknown-linux-gnu -C linker=powerpc64-linux-gnu-gcc -Ctarget-cpu=pwr9 -o transmute_call_lib.rs.ppc64.pwr9 --extern transmute_lib=libtransmute_lib.rlib ``` Execution: ``` $ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ transmute_call_lib.rs.ppc64.pwr9; echo $? 1 ``` The above should output 0. Full IR: <details> <summary>Library function</summary> ```llvm ; ModuleID = 'transmute_lib.1e179323af95c5c5-cgu.0' source_filename = "transmute_lib.1e179323af95c5c5-cgu.0" target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: uwtable define fp128 @entry(i128 %a) unnamed_addr #0 { start: %_0 = bitcast i128 %a to fp128 ret fp128 %_0 } attributes #0 = { uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{!"rustc version 1.80.0-nightly (9c9b56879 2024-05-05)"} ``` </details> <details> <summary>Executable</summary> The only relevant part is in `transmute_call_lib::main` ```llvm ; ModuleID = 'transmute_call_lib.1791869a4d73965-cgu.0' source_filename = "transmute_call_lib.1791869a4d73965-cgu.0" target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64-unknown-linux-gnu" @vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lan ; std::sys_common::backtrace::__rust_begin_short_backtrace ; Function Attrs: noinline uwtable define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf94855fdbb5829c5E(ptr %f) unnamed_addr #0 { start: ; call core::ops::function::FnOnce::call_once call void @_ZN4core3ops8function6FnOnce9call_once17h3056e9bd87d66482E(ptr %f) call void asm sideeffect "", "~{memory}"(), !srcloc !3 ret void } ; std::rt::lang_start ; Function Attrs: uwtable define hidden i64 @_ZN3std2rt10lang_start17hdc6989dbc5702db1E(ptr %main, i64 %argc, ptr %argv, i8 %sigpipe) unnamed_addr #1 { start: %_8 = alloca [8 x i8], align 8 %_5 = alloca [8 x i8], align 8 store ptr %main, ptr %_8, align 8 ; call std::rt::lang_start_internal %0 = call i64 @_ZN3std2rt19lang_start_internal17he440c14c55fd4760E(ptr align 1 %_8, ptr align 8 @vtable.0, i64 %argc, ptr %argv, store i64 %0, ptr %_5, align 8 %v = load i64, ptr %_5, align 8 ret i64 %v } ; std::rt::lang_start::{{closure}} ; Function Attrs: inlinehint uwtable define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hdb386b686ec8565dE"(ptr align 8 %_1) unnamed_addr #2 { start: %self = alloca [1 x i8], align 1 %_4 = load ptr, ptr %_1, align 8 ; call std::sys_common::backtrace::__rust_begin_short_backtrace call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17hf94855fdbb5829c5E(ptr %_4) ; call <() as std::process::Termination>::report %0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h3606faab680ab85fE"() store i8 %0, ptr %self, align 1 %_6 = load i8, ptr %self, align 1 %_0 = zext i8 %_6 to i32 ret i32 %_0 } ; core::ops::function::FnOnce::call_once{{vtable.shim}} ; Function Attrs: inlinehint uwtable define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h956497c27ef6ef7fE"(ptr %_1) unnamed start: %_2 = alloca [0 x i8], align 1 %0 = load ptr, ptr %_1, align 8 ; call core::ops::function::FnOnce::call_once %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h9737a54d47c05e22E(ptr %0) ret i32 %_0 } ; core::ops::function::FnOnce::call_once ; Function Attrs: inlinehint uwtable define internal void @_ZN4core3ops8function6FnOnce9call_once17h3056e9bd87d66482E(ptr %_1) unnamed_addr #2 { start: %_2 = alloca [0 x i8], align 1 call void %_1() ret void } ; core::ops::function::FnOnce::call_once ; Function Attrs: inlinehint uwtable define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h9737a54d47c05e22E(ptr %0) unnamed_addr #2 personality ptr @rust_eh_p start: %1 = alloca [16 x i8], align 8 %_2 = alloca [0 x i8], align 1 %_1 = alloca [8 x i8], align 8 store ptr %0, ptr %_1, align 8 ; invoke std::rt::lang_start::{{closure}} %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hdb386b686ec8565dE"(ptr align 8 %_1) to label %bb1 unwind label %cleanup bb3: ; preds = %cleanup %2 = load ptr, ptr %1, align 8 %3 = getelementptr inbounds i8, ptr %1, i64 8 %4 = load i32, ptr %3, align 8 %5 = insertvalue { ptr, i32 } poison, ptr %2, 0 %6 = insertvalue { ptr, i32 } %5, i32 %4, 1 resume { ptr, i32 } %6 cleanup: ; preds = %start %7 = landingpad { ptr, i32 } cleanup %8 = extractvalue { ptr, i32 } %7, 0 %9 = extractvalue { ptr, i32 } %7, 1 store ptr %8, ptr %1, align 8 %10 = getelementptr inbounds i8, ptr %1, i64 8 store i32 %9, ptr %10, align 8 br label %bb3 bb1: ; preds = %start ret i32 %_0 } ; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}> ; Function Attrs: inlinehint uwtable define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h64db6 start: ret void } ; <() as std::process::Termination>::report ; Function Attrs: inlinehint uwtable define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h3606faab680ab85fE"() unnamed_addr #2 { start: ret i8 0 } ; transmute_call_lib::main ; Function Attrs: uwtable define internal void @_ZN18transmute_call_lib4main17hfa19d9d60e61b95dE() unnamed_addr #1 { start: %a = call fp128 @entry(i128 1) %res = bitcast fp128 %a to i128 %0 = icmp eq i128 %res, 1 br i1 %0, label %bb2, label %bb3 bb2: ; preds = %start ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 0) #5 unreachable bb3: ; preds = %start %_4 = icmp eq i128 %res, 18446744073709551616 br i1 %_4, label %bb5, label %bb6 bb6: ; preds = %bb3 ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 2) #5 unreachable bb5: ; preds = %bb3 ; call std::process::exit call void @_ZN3std7process4exit17hb2f96155fda111fdE(i32 1) #5 unreachable } ; std::rt::lang_start_internal ; Function Attrs: uwtable declare i64 @_ZN3std2rt19lang_start_internal17he440c14c55fd4760E(ptr align 1, ptr align 8, i64, ptr, i8) unnamed_addr #1 ; Function Attrs: uwtable declare zeroext i32 @rust_eh_personality(i32 signext, i32 zeroext, i64, ptr, ptr) unnamed_addr #1 ; Function Attrs: uwtable declare fp128 @entry(i128) unnamed_addr #1 ; std::process::exit ; Function Attrs: noreturn uwtable declare void @_ZN3std7process4exit17hb2f96155fda111fdE(i32) unnamed_addr #3 define i32 @main(i32 %0, ptr %1) unnamed_addr #4 { top: %2 = sext i32 %0 to i64 ; call std::rt::lang_start %3 = call i64 @_ZN3std2rt10lang_start17hdc6989dbc5702db1E(ptr @_ZN18transmute_call_lib4main17hfa19d9d60e61b95dE, i64 %2, ptr %1, i8 0) %4 = trunc i64 %3 to i32 ret i32 %4 } attributes #0 = { noinline uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #1 = { uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #2 = { inlinehint uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #3 = { noreturn uwtable "probe-stack"="inline-asm" "target-cpu"="pwr9" } attributes #4 = { "target-cpu"="pwr9" } attributes #5 = { noreturn } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 8, !"PIC Level", i32 2} !1 = !{i32 7, !"PIE Level", i32 2} !2 = !{!"rustc version 1.80.0-nightly (9c9b56879 2024-05-05)"} !3 = !{i32 1628919} ``` </details> This could be indirectly related to https://github.com/llvm/llvm-project/issues/92233#issuecomment-2112775463 since that needs use of `f128` to cause the crash.