rust-lang / rustc_codegen_cranelift

Cranelift based backend for rustc
Apache License 2.0
1.59k stars 100 forks source link

Implement a whole bunch more x86 vendor intrinsics #1380

Closed bjorn3 closed 1 year ago

bjorn3 commented 1 year ago

Ignoring AVX512 I'm now down to 190 failing tests for stdarch out of 725.

Helps with #1289 and #1222

bjorn3 commented 1 year ago
missing intrinsics ``` core_arch::x86::aes::tests::test_mm_aesdec_si128 core_arch::x86::aes::tests::test_mm_aesdeclast_si128 core_arch::x86::aes::tests::test_mm_aesenc_si128 core_arch::x86::aes::tests::test_mm_aesenclast_si128 core_arch::x86::aes::tests::test_mm_aesimc_si128 core_arch::x86::aes::tests::test_mm_aeskeygenassist_si128 core_arch::x86::fxsr::tests::fxsave core_arch::x86::pclmulqdq::tests::test_mm_clmulepi64_si128 core_arch::x86::rdtsc::tests::_rdtsc core_arch::x86::rdtsc::tests::_rdtscp core_arch::x86::sse2::tests::test_mm_avg_epu16 core_arch::x86::sse2::tests::test_mm_avg_epu8 core_arch::x86::sse2::tests::test_mm_clflush core_arch::x86::sse2::tests::test_mm_cmpeq_sd core_arch::x86::sse2::tests::test_mm_cmpge_sd core_arch::x86::sse2::tests::test_mm_cmpgt_sd core_arch::x86::sse2::tests::test_mm_cmple_sd core_arch::x86::sse2::tests::test_mm_cmplt_sd core_arch::x86::sse2::tests::test_mm_cmpneq_sd core_arch::x86::sse2::tests::test_mm_cmpnge_sd core_arch::x86::sse2::tests::test_mm_cmpngt_sd core_arch::x86::sse2::tests::test_mm_cmpnle_sd core_arch::x86::sse2::tests::test_mm_cmpnlt_sd core_arch::x86::sse2::tests::test_mm_cmpord_sd core_arch::x86::sse2::tests::test_mm_cmpunord_sd core_arch::x86::sse2::tests::test_mm_comieq_sd core_arch::x86::sse2::tests::test_mm_comige_sd core_arch::x86::sse2::tests::test_mm_comigt_sd core_arch::x86::sse2::tests::test_mm_comile_sd core_arch::x86::sse2::tests::test_mm_comilt_sd core_arch::x86::sse2::tests::test_mm_comineq_sd core_arch::x86::sse2::tests::test_mm_cvtepi32_ps core_arch::x86::sse2::tests::test_mm_cvtpd_epi32 core_arch::x86::sse2::tests::test_mm_cvtpd_ps core_arch::x86::sse2::tests::test_mm_cvtps_epi32 core_arch::x86::sse2::tests::test_mm_cvtps_pd core_arch::x86::sse2::tests::test_mm_cvtsd_si32 core_arch::x86::sse2::tests::test_mm_cvtsd_ss core_arch::x86::sse2::tests::test_mm_cvtss_sd core_arch::x86::sse2::tests::test_mm_cvttpd_epi32 core_arch::x86::sse2::tests::test_mm_cvttps_epi32 core_arch::x86::sse2::tests::test_mm_cvttsd_si32 core_arch::x86::sse2::tests::test_mm_lfence core_arch::x86::sse2::tests::test_mm_madd_epi16 core_arch::x86::sse2::tests::test_mm_maskmoveu_si128 core_arch::x86::sse2::tests::test_mm_max_pd core_arch::x86::sse2::tests::test_mm_max_sd core_arch::x86::sse2::tests::test_mm_mfence core_arch::x86::sse2::tests::test_mm_min_pd core_arch::x86::sse2::tests::test_mm_min_sd core_arch::x86::sse2::tests::test_mm_mul_epu32 core_arch::x86::sse2::tests::test_mm_mulhi_epi16 core_arch::x86::sse2::tests::test_mm_mulhi_epu16 core_arch::x86::sse2::tests::test_mm_packs_epi16 core_arch::x86::sse2::tests::test_mm_packs_epi32 core_arch::x86::sse2::tests::test_mm_packus_epi16 core_arch::x86::sse2::tests::test_mm_sad_epu8 core_arch::x86::sse2::tests::test_mm_sll_epi16 core_arch::x86::sse2::tests::test_mm_sll_epi32 core_arch::x86::sse2::tests::test_mm_sll_epi64 core_arch::x86::sse2::tests::test_mm_sqrt_sd core_arch::x86::sse2::tests::test_mm_sra_epi16 core_arch::x86::sse2::tests::test_mm_sra_epi32 core_arch::x86::sse2::tests::test_mm_srl_epi16 core_arch::x86::sse2::tests::test_mm_srl_epi32 core_arch::x86::sse2::tests::test_mm_srl_epi64 core_arch::x86::sse2::tests::test_mm_ucomieq_sd core_arch::x86::sse2::tests::test_mm_ucomige_sd core_arch::x86::sse2::tests::test_mm_ucomigt_sd core_arch::x86::sse2::tests::test_mm_ucomile_sd core_arch::x86::sse2::tests::test_mm_ucomilt_sd core_arch::x86::sse2::tests::test_mm_ucomineq_sd core_arch::x86::sse3::tests::test_mm_addsub_pd core_arch::x86::sse3::tests::test_mm_addsub_ps core_arch::x86::sse3::tests::test_mm_hadd_pd core_arch::x86::sse3::tests::test_mm_hadd_ps core_arch::x86::sse3::tests::test_mm_hsub_pd core_arch::x86::sse3::tests::test_mm_hsub_ps core_arch::x86::sse3::tests::test_mm_lddqu_si128 core_arch::x86::sse41::tests::test_mm_blend_epi16 core_arch::x86::sse41::tests::test_mm_blend_pd core_arch::x86::sse41::tests::test_mm_blend_ps core_arch::x86::sse41::tests::test_mm_blendv_epi8 core_arch::x86::sse41::tests::test_mm_blendv_pd core_arch::x86::sse41::tests::test_mm_blendv_ps core_arch::x86::sse41::tests::test_mm_ceil_sd core_arch::x86::sse41::tests::test_mm_ceil_ss core_arch::x86::sse41::tests::test_mm_dp_pd core_arch::x86::sse41::tests::test_mm_dp_ps core_arch::x86::sse41::tests::test_mm_floor_sd core_arch::x86::sse41::tests::test_mm_floor_ss core_arch::x86::sse41::tests::test_mm_insert_ps core_arch::x86::sse41::tests::test_mm_minpos_epu16 core_arch::x86::sse41::tests::test_mm_minpos_epu16_1 core_arch::x86::sse41::tests::test_mm_minpos_epu16_2 core_arch::x86::sse41::tests::test_mm_mpsadbw_epu8 core_arch::x86::sse41::tests::test_mm_mul_epi32 core_arch::x86::sse41::tests::test_mm_packus_epi32 core_arch::x86::sse41::tests::test_mm_round_pd core_arch::x86::sse41::tests::test_mm_round_ps core_arch::x86::sse41::tests::test_mm_round_sd core_arch::x86::sse41::tests::test_mm_round_ss core_arch::x86::sse41::tests::test_mm_test_all_ones core_arch::x86::sse41::tests::test_mm_test_all_zeros core_arch::x86::sse41::tests::test_mm_test_mix_ones_zeros core_arch::x86::sse41::tests::test_mm_testc_si128 core_arch::x86::sse41::tests::test_mm_testnzc_si128 core_arch::x86::sse41::tests::test_mm_testz_si128 core_arch::x86::sse42::tests::test_mm_cmpestra core_arch::x86::sse42::tests::test_mm_cmpestrc core_arch::x86::sse42::tests::test_mm_cmpestri core_arch::x86::sse42::tests::test_mm_cmpestrm core_arch::x86::sse42::tests::test_mm_cmpestro core_arch::x86::sse42::tests::test_mm_cmpestrs core_arch::x86::sse42::tests::test_mm_cmpestrz core_arch::x86::sse42::tests::test_mm_cmpistra core_arch::x86::sse42::tests::test_mm_cmpistrc core_arch::x86::sse42::tests::test_mm_cmpistri core_arch::x86::sse42::tests::test_mm_cmpistrm core_arch::x86::sse42::tests::test_mm_cmpistro core_arch::x86::sse42::tests::test_mm_cmpistrs core_arch::x86::sse42::tests::test_mm_cmpistrz core_arch::x86::sse42::tests::test_mm_crc32_u16 core_arch::x86::sse42::tests::test_mm_crc32_u32 core_arch::x86::sse42::tests::test_mm_crc32_u8 core_arch::x86::sse::tests::test_mm_add_ss core_arch::x86::sse::tests::test_mm_cmpeq_ss core_arch::x86::sse::tests::test_mm_cmpge_ss core_arch::x86::sse::tests::test_mm_cmpgt_ss core_arch::x86::sse::tests::test_mm_cmple_ss core_arch::x86::sse::tests::test_mm_cmplt_ss core_arch::x86::sse::tests::test_mm_cmpneq_ss core_arch::x86::sse::tests::test_mm_cmpnge_ss core_arch::x86::sse::tests::test_mm_cmpngt_ss core_arch::x86::sse::tests::test_mm_cmpnle_ss core_arch::x86::sse::tests::test_mm_cmpnlt_ss core_arch::x86::sse::tests::test_mm_cmpord_ss core_arch::x86::sse::tests::test_mm_cmpunord_ss core_arch::x86::sse::tests::test_mm_comieq_ss core_arch::x86::sse::tests::test_mm_comieq_ss_vs_ucomieq_ss core_arch::x86::sse::tests::test_mm_comigt_ss core_arch::x86::sse::tests::test_mm_comile_ss core_arch::x86::sse::tests::test_mm_comilt_ss core_arch::x86::sse::tests::test_mm_comineq_ss core_arch::x86::sse::tests::test_mm_cvtsi32_ss core_arch::x86::sse::tests::test_mm_cvtss_si32 core_arch::x86::sse::tests::test_mm_cvttss_si32 core_arch::x86::sse::tests::test_mm_div_ss core_arch::x86::sse::tests::test_mm_getcsr_setcsr_1 core_arch::x86::sse::tests::test_mm_getcsr_setcsr_2 core_arch::x86::sse::tests::test_mm_getcsr_setcsr_underflow core_arch::x86::sse::tests::test_mm_max_ps core_arch::x86::sse::tests::test_mm_max_ss core_arch::x86::sse::tests::test_mm_min_ps core_arch::x86::sse::tests::test_mm_min_ss core_arch::x86::sse::tests::test_mm_mul_ss core_arch::x86::sse::tests::test_mm_rcp_ps core_arch::x86::sse::tests::test_mm_rcp_ss core_arch::x86::sse::tests::test_mm_rsqrt_ps core_arch::x86::sse::tests::test_mm_rsqrt_ss core_arch::x86::sse::tests::test_mm_sfence core_arch::x86::sse::tests::test_mm_sqrt_ps core_arch::x86::sse::tests::test_mm_sqrt_ss core_arch::x86::sse::tests::test_mm_sub_ss core_arch::x86::sse::tests::test_mm_ucomieq_ss core_arch::x86::sse::tests::test_mm_ucomige_ss core_arch::x86::sse::tests::test_mm_ucomigt_ss core_arch::x86::sse::tests::test_mm_ucomile_ss core_arch::x86::sse::tests::test_mm_ucomilt_ss core_arch::x86::sse::tests::test_mm_ucomineq_ss core_arch::x86::ssse3::tests::test_mm_hadd_epi16 core_arch::x86::ssse3::tests::test_mm_hadd_epi32 core_arch::x86::ssse3::tests::test_mm_hadds_epi16 core_arch::x86::ssse3::tests::test_mm_hsub_epi16 core_arch::x86::ssse3::tests::test_mm_hsub_epi32 core_arch::x86::ssse3::tests::test_mm_hsubs_epi16 core_arch::x86::ssse3::tests::test_mm_maddubs_epi16 core_arch::x86::ssse3::tests::test_mm_mulhrs_epi16 core_arch::x86::ssse3::tests::test_mm_sign_epi16 core_arch::x86::ssse3::tests::test_mm_sign_epi32 core_arch::x86::ssse3::tests::test_mm_sign_epi8 core_arch::x86_64::fxsr::tests::fxsave64 core_arch::x86_64::sse2::tests::test_mm_cvtsd_si64 core_arch::x86_64::sse2::tests::test_mm_cvtsd_si64x core_arch::x86_64::sse2::tests::test_mm_cvttsd_si64 core_arch::x86_64::sse2::tests::test_mm_cvttsd_si64x core_arch::x86_64::sse42::tests::test_mm_crc32_u64 core_arch::x86_64::sse::tests::test_mm_cvtsi64_ss core_arch::x86_64::sse::tests::test_mm_cvtss_si64 core_arch::x86_64::sse::tests::test_mm_cvttss_si64 test result: FAILED. 725 passed; 190 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.15s ```
stdarch patch ```diff diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs index 99e37c91..ee666063 100644 --- a/crates/assert-instr-macro/src/lib.rs +++ b/crates/assert-instr-macro/src/lib.rs @@ -181,7 +181,7 @@ pub fn assert_instr( let tokens: TokenStream = quote! { #item - #tokens + //#tokens }; tokens.into() } diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index fafee5c0..097a17f9 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -738,7 +738,7 @@ pub const _CMP_TRUE_US: i32 = 0x1f; #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM5, 5); - vcmppd(a, b, IMM5 as i8) + vcmppd(a, b, const { IMM5 as i8 }) } /// Compares packed double-precision (64-bit) floating-point @@ -768,7 +768,7 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM5, 5); - vcmpps(a, b, IMM5 as i8) + vcmpps(a, b, const { IMM5 as i8 }) } /// Compares packed single-precision (32-bit) floating-point @@ -783,7 +783,7 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM5, 5); - vcmpps256(a, b, IMM5 as u8) + vcmpps256(a, b, const { IMM5 as u8 }) } /// Compares the lower double-precision (64-bit) floating-point element in diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs index ee8b7e75..f013a9ea 100644 --- a/crates/core_arch/src/x86/mod.rs +++ b/crates/core_arch/src/x86/mod.rs @@ -1,6 +1,6 @@ //! `x86` and `x86_64` intrinsics. -use crate::{intrinsics, marker::Sized, mem::transmute}; +use crate::{marker::Sized, mem::transmute}; #[macro_use] mod macros; @@ -807,6 +807,7 @@ pub use self::adx::*; #[cfg(test)] use stdarch_test::assert_instr; +/* mod avx512f; pub use self::avx512f::*; @@ -854,3 +855,4 @@ pub use self::f16c::*; mod avx512bf16; pub use self::avx512bf16::*; +*/ diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs index ec429803..20cb774f 100644 --- a/crates/core_arch/src/x86/test.rs +++ b/crates/core_arch/src/x86/test.rs @@ -124,7 +124,7 @@ mod x86_polyfill { pub use crate::core_arch::x86_64::{_mm256_insert_epi64, _mm_insert_epi64}; } pub use self::x86_polyfill::*; - +/* pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) } @@ -142,3 +142,4 @@ pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) { panic!("{:?} != {:?}", a, b); } } +*/ diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs index 461874ec..468dbc45 100644 --- a/crates/core_arch/src/x86_64/mod.rs +++ b/crates/core_arch/src/x86_64/mod.rs @@ -36,8 +36,10 @@ pub use self::bmi2::*; mod avx2; pub use self::avx2::*; +/* mod avx512f; pub use self::avx512f::*; +*/ mod bswap; pub use self::bswap::*; ```
patch to disable fake cpuid ```diff diff --git a/src/inline_asm.rs b/src/inline_asm.rs index 3ba530c0..ea3c2f8e 100644 --- a/src/inline_asm.rs +++ b/src/inline_asm.rs @@ -51,7 +51,7 @@ pub(crate) fn codegen_inline_asm<'tcx>( } // Used by stdarch - if template[0] == InlineAsmTemplatePiece::String("mov ".to_string()) + /*if template[0] == InlineAsmTemplatePiece::String("mov ".to_string()) && matches!( template[1], InlineAsmTemplatePiece::Placeholder { @@ -129,7 +129,7 @@ pub(crate) fn codegen_inline_asm<'tcx>( let destination_block = fx.get_block(destination.unwrap()); fx.bcx.ins().jump(destination_block, &[]); return; - } + }*/ // Used by compiler-builtins if fx.tcx.symbol_name(fx.instance).name.starts_with("___chkstk") { ```