rust-lang / rust

Empowering everyone to build reliable and efficient software.
https://www.rust-lang.org
Other
98.61k stars 12.74k forks source link

right shift of i128i by zero fails on s390x (SystemZ) #52015

Open gnzlbg opened 6 years ago

gnzlbg commented 6 years ago

I've set up a repository that reproduces this issue on travis (using qemu): https://github.com/gnzlbg/repro_s390x

The following code right shifts a <1 x i128> containing 1 by 0. The result of the shift should be 1, but on debug builds it is 0, causing the following code to panic (on release the code does not panic):

#![feature(repr_simd, platform_intrinsics)]
#![allow(non_camel_case_types)]

#[derive(Copy,Clone)]
#[repr(simd)]
pub struct i128x1(i128);

extern "platform-intrinsic" {
    pub fn simd_shr<T>(x: T, y: T) -> T;
}

#[test]
pub fn test() {
    unsafe {
        let z = i128x1(0 as i128);
        let o = i128x1(1 as i128);

        if simd_shr(o, z).0 !=  o.0 {
            panic!();
        }
    }
}

The test function generates the following LLVM-IR

define void @repro_s390x::test() unnamed_addr #0 {
start:
%tmp_ret = alloca <1 x i128>, align 16
%o = alloca <1 x i128>, align 16
%z = alloca <1 x i128>, align 16
%0 = bitcast <1 x i128>* %z to i128*
store i128 0, i128* %0, align 8
%1 = bitcast <1 x i128>* %o to i128*
store i128 1, i128* %1, align 8
%2 = load <1 x i128>, <1 x i128>* %o, align 16
%3 = load <1 x i128>, <1 x i128>* %z, align 16
%4 = ashr <1 x i128> %2, %3
store <1 x i128> %4, <1 x i128>* %tmp_ret, align 16
%5 = load <1 x i128>, <1 x i128>* %tmp_ret, align 16
br label %bb1

bb1:                                              ; preds = %start
%6 = bitcast <1 x i128> %5 to i128
%7 = bitcast <1 x i128>* %o to i128*
%8 = load i128, i128* %7, align 8
%9 = icmp ne i128 %6, %8
br i1 %9, label %bb2, label %bb3

bb2:                                              ; preds = %bb1
; call std::panicking::begin_panic
call void @_ZN3std9panicking11begin_panic17h3f2f8b63a0f87b42E([0 x i8]* noalias nonnull readonly bitcast (<{ [14 x i8] }>* @byte_str.2 to [0 x i8]*), i64 14, { [0 x i64], { [0 x i8]*, i64 }, [0 x i32], i32, [0 x i32], i32, [0 x i32] }* noalias readonly dereferenceable(24) bitcast (<{ i8*, [16 x i8] }>* @byte_str.1 to { [0 x i64], { [0 x i8]*, i64 }, [0 x i32], i32, [0 x i32], i32, [0 x i32] }*))
unreachable

bb3:                                              ; preds = %bb1
ret void
}

which is lowered to

repro_s390x::test (src/lib.rs:12):
 stmg    %r11, %r15, 88(%r15)
 aghi    %r15, -240
 lgr     %r11, %r15
 lgr     %r0, %r15
 lgr     %r1, %r0
 aghi    %r1, -24
 la      %r0, 168(%r1)
 lgr     %r2, %r0
 nill    %r2, 65520
 lgr     %r15, %r1
 lgr     %r0, %r15
 lgr     %r1, %r0
 aghi    %r1, -24
 la      %r0, 168(%r1)
 lgr     %r3, %r0
 nill    %r3, 65520
 lgr     %r15, %r1
 lgr     %r0, %r15
 lgr     %r1, %r0
 aghi    %r1, -24
 la      %r0, 168(%r1)
 lgr     %r4, %r0
 nill    %r4, 65520
 lgr     %r15, %r1
 mvghi   8(%r4), 0
 mvghi   0(%r4), 0
 mvghi   8(%r3), 1
 mvghi   0(%r3), 0
 lg      %r0, 0(%r3)
 lg      %r1, 8(%r3)
 lg      %r5, 0(%r4)
 lg      %r4, 8(%r4)
 stg     %r4, 200(%r11)
 stg     %r5, 192(%r11)
 stg     %r1, 216(%r11)
 stg     %r0, 208(%r11)
 la      %r0, 224(%r11)
 la      %r1, 208(%r11)
 la      %r4, 192(%r11)
 stg     %r2, 184(%r11)
 lgr     %r2, %r0
 stg     %r3, 176(%r11)
 lgr     %r3, %r1
 brasl   %r14, __ashrti3@PLT
 lg      %r0, 224(%r11)
 lg      %r1, 232(%r11)
 lg      %r2, 184(%r11)
 stg     %r1, 8(%r2)
 stg     %r0, 0(%r2)
 lg      %r0, 8(%r2)
 lg      %r1, 0(%r2)
 stg     %r0, 168(%r11)
 stg     %r1, 160(%r11)
 j       .LBB0_1
.LBB0_1:
 lg      %r1, 176(%r11)
 lg      %r0, 8(%r1)
 lg      %r2, 0(%r1)
 lg      %r3, 160(%r11)
 xgr     %r3, %r2
 lg      %r2, 168(%r11)
 xgr     %r2, %r0
 ogr     %r2, %r3
 cghi    %r2, 0
 je      .LBB0_3
 j       .LBB0_2
.LBB0_2:
 larl    %r2, .Lbyte_str.2
 larl    %r4, .Lbyte_str.1
 lghi    %r3, 14
 brasl   %r14, std::panicking::begin_panic@PLT
 j       .Ltmp9+2
.LBB0_3:
 lmg     %r11, %r15, 328(%r11)
 br      %r14

cc @rkruppe

Who maintains Rust's SystemZ support?

hellow554 commented 6 years ago

A few questions and annotations.

gnzlbg commented 6 years ago

is the order of the parameter correct [...] I can't find the doc to simd_shr

simd_shr is an undocumented compiler intrinsic for internal use in the std library only (not exposed to users), so it isn't really documented anywhere. Its type checked here, and it's lowered to LLVM IR here, by generating a ashr (here), with the arguments of simd_shr passed in order to ashr.

The LLVM's LangRef documents ashr as:

The ‘ashr’ instruction (arithmetic shift right) returns the first operand shifted to the right a specified number of bits with sign extension.

That is, the first argument of ashr is the value to be shifted, and the second value is the number of bits to shift.

So it appears that the argument order is correct. The LLVM IR Builder for ashr also appears ok (here).

Why don't you write 0i128.

This was produced from minimizing code that uses as and was failing only on s390x on debug builds, so I just kept that in the MWE.

I've just tried to use 0_i128 instead and that does not reproduce the issue, so maybe this has something to do with as ? cc @eddyb EDIT: I am dumb and disabled the test while trying out this change, using 0_i128 does still reproduce the issue. The latest commit in the repo has this.

gnzlbg commented 6 years ago

Expanding on the code generating this issue, this was discovered in the ppv crate, which tries to fully implement the ppv RFC. The code works as expected everywhere else:

screen shot 2018-07-03 at 14 40 12

hanna-kruppe commented 6 years ago

Does this only affect 1xi128 vectors or does it reproduce with a scalar i128 too?

gnzlbg commented 6 years ago

Does this only affect 1xi128 vectors or does it reproduce with a scalar i128 too?

I've added the equivalent test using i128 to the repo (see below) and it does not panic, so it appears to only affect vectors. In stdsimd I can reproduce this with i128x1 and i128x2 only (and shr only IIRC, shl appears to work correctly), all other tests for s390x-unknown-linux-gnu pass successfully.


This test passes (it does not panic):

#[test]
pub fn scalar_test() {
    let z = 0_i128;
    let o = 1_i128;

    if o >> z != o {
        panic!();
    }
}
gnzlbg commented 6 years ago

This test also fails for sparc64-unknown-linux-gnu

gnzlbg commented 6 years ago

Now that godbolt supports cross-compilation for rust, it is much easier to try to debug this (https://godbolt.org/g/No8zpS):

example::scalar_test:
        stmg    %r14, %r15, 112(%r15)
        aghi    %r15, -160
        j       .LBB47_1
.LBB47_1:
        lhi     %r0, 1
        chi     %r0, 0
        jlh     .LBB47_3
        j       .LBB47_2
.LBB47_2:
        larl    %r2, .Lbyte_str.6
        larl    %r4, .Lbyte_str.5
        lghi    %r3, 14
        brasl   %r14, std::panicking::begin_panic@PLT
.Ltmp29:
        j       .Ltmp29+2
.LBB47_3:
        lmg     %r14, %r15, 272(%r15)
        br      %r14

.Lbyte_str.4:
        .ascii  "/tmp/compiler-explorer-compiler118710-63-1i2e6ad.i6ghg/example.rs"

.Lbyte_str.5:
        .quad   .Lbyte_str.4
        .ascii  "\000\000\000\000\000\000\000A\000\000\000\007\000\000\000\t"

.Lbyte_str.6:
        .ascii  "explicit panic"

from

define void @_ZN7example11scalar_test17h9448becacb3cda6dE() unnamed_addr #0 {
  br label %bb1

bb1:                                              ; preds = %start
  br i1 false, label %bb2, label %bb3

bb2:                                              ; preds = %bb1
  call void @_ZN3std9panicking11begin_panic17h0260f1c8a33dd8a1E([0 x i8]* noalias nonnull readonly bitcast (<{ [14 x i8] }>* @byte_str.6 to [0 x i8]*), i64 14, { [0 x i64], { [0 x i8]*, i64 }, [0 x i32], i32, [0 x i32], i32, [0 x i32] }* noalias readonly dereferenceable(24) bitcast (<{ i8*, [16 x i8] }>* @byte_str.5 to { [0 x i64], { [0 x i8]*, i64 }, [0 x i32], i32, [0 x i32], i32, [0 x i32] }*))
  unreachable

bb3:                                              ; preds = %bb1
  ret void
}

so everything looks correct now, I am restarting the build but travis is having issues. If the build still fails I don't know what's the problem here.

sanxiyn commented 5 years ago

What happened on Travis?

gnzlbg commented 5 years ago

I've restarted the build: https://travis-ci.org/gnzlbg/repro_s390x/builds/399684908

It still fails.

Erk- commented 3 years ago

I cannot reproduce it on a s390x machine anymore so I think it is fixed

[linux1@rusting repro_s390x]$ cargo +nightly test
   Compiling repro_s390x v0.1.0 (/home/linux1/dev/rustbug/repro_s390x)
    Finished test [unoptimized + debuginfo] target(s) in 2.09s
     Running unittests (target/debug/deps/repro_s390x-8197d3a364efe000)

running 2 tests
test scalar_test ... ok
test vector_test ... ok

Tested on a vm provided by Marist College through the community cloud https://linuxone.cloud.marist.edu/