llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.83k stars 11.46k forks source link

WASM: v128.bitselect When Argument is Zeroed Can Be Simplified #73454

Open Geotale opened 9 months ago

Geotale commented 9 months ago

Example code, compiled with Rustc nightly (arguments: -C opt-level=3 --target=wasm32-unknown-unknown -C target-feature=+simd128 -C target-feature=+multivalue)

#![feature(portable_simd)]
use std::simd::*;

#[inline(never)]
pub fn flush_subnormals(input: f32x4) -> f32x4 {
    let bits = input.to_bits();

    let mask = bits & u32x4::splat(0x7f800000);
    let ne_mask = mask.simd_ne(u32x4::splat(0x00000000));

    let result = bits & ne_mask.to_int().cast::<u32>();
    f32x4::from_bits(result)
}

Compiling this results in the IR:

define hidden void @example::flush_subnormals(ptr noalias nocapture noundef writeonly sret(<4 x float>) align 16 dereferenceable(16) %_0, ptr noalias nocapture noundef readonly align 16 dereferenceable(16) %input) unnamed_addr {
start:
  %0 = load <4 x i32>, ptr %input, align 16
  %1 = and <4 x i32> %0, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %.not = icmp eq <4 x i32> %1, zeroinitializer
  %2 = select <4 x i1> %.not, <4 x i32> zeroinitializer, <4 x i32> %0
  store <4 x i32> %2, ptr %_0, align 16
  ret void
}

which then produces the WASM:

flush_subnormals:
        local.get       0
        v128.const      0, 0, 0, 0
        local.tee       2
        local.get       1
        v128.load       0
        local.tee       3
        local.get       2
        local.get       3
        v128.const      2139095040, 2139095040, 2139095040, 2139095040 ; 0x7f800000 0x7f800000 0x7f800000 0x7f800000
        v128.and
        i32x4.eq
        v128.bitselect
        v128.store      0
        end_function

Due to the first argument of bitselect being 0 0 0 0 and the condition being reversible, the WebAssembly is simplifiable into:

flush_subnormals:
        local.get       0
        local.get       1
        v128.load       0
        local.tee       2                                              ; One fewer local used
        v128.const      0, 0, 0, 0
        local.get       2
        v128.const      2139095040, 2139095040, 2139095040, 2139095040 ; 0x7f800000 0x7f800000 0x7f800000 0x7f800000
        v128.and
        i32x4.ne                                                       ; Condition reversed
        v128.and                                                       ; bitselect replaced with and
        v128.store      0
        end_function

If the select expression had the second argument as a zeroinitializer instead, the condition would not need to be reversed, and the bitselect would simply be able to be converted into an and!

Godbolt link: https://godbolt.org/z/TqK5f588f

llvmbot commented 9 months ago

@llvm/issue-subscribers-backend-webassembly

Author: None (Geotale)

Example code, compiled with Rustc nightly (arguments: `-C opt-level=3 --target=wasm32-unknown-unknown -C target-feature=+simd128 -C target-feature=+multivalue`) ```rust #![feature(portable_simd)] use std::simd::*; #[inline(never)] pub fn flush_subnormals(input: f32x4) -> f32x4 { let bits = input.to_bits(); let mask = bits & u32x4::splat(0x7f800000); let ne_mask = mask.simd_ne(u32x4::splat(0x00000000)); let result = bits & ne_mask.to_int().cast::<u32>(); f32x4::from_bits(result) } ``` Compiling this results in the IR: ```llvm define hidden void @example::flush_subnormals(ptr noalias nocapture noundef writeonly sret(<4 x float>) align 16 dereferenceable(16) %_0, ptr noalias nocapture noundef readonly align 16 dereferenceable(16) %input) unnamed_addr { start: %0 = load <4 x i32>, ptr %input, align 16 %1 = and <4 x i32> %0, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> %.not = icmp eq <4 x i32> %1, zeroinitializer %2 = select <4 x i1> %.not, <4 x i32> zeroinitializer, <4 x i32> %0 store <4 x i32> %2, ptr %_0, align 16 ret void } ``` which then produces the WASM: ```wasm flush_subnormals: local.get 0 v128.const 0, 0, 0, 0 local.tee 2 local.get 1 v128.load 0 local.tee 3 local.get 2 local.get 3 v128.const 2139095040, 2139095040, 2139095040, 2139095040 ; 0x7f800000 0x7f800000 0x7f800000 0x7f800000 v128.and i32x4.eq v128.bitselect v128.store 0 end_function ``` Due to the first argument of `bitselect` being `0 0 0 0` and the condition being reversible, the WebAssembly is simplifiable into: ```wasm flush_subnormals: local.get 0 local.get 1 v128.load 0 local.tee 2 ; One fewer local used v128.const 0, 0, 0, 0 local.get 2 v128.const 2139095040, 2139095040, 2139095040, 2139095040 ; 0x7f800000 0x7f800000 0x7f800000 0x7f800000 v128.and i32x4.ne ; Condition reversed v128.and ; bitselect replaced with and v128.store 0 end_function ``` If the `select` expression had the second argument as a zeroinitializer instead, the condition would not need to be reversed, and the `bitselect` would simply be able to be converted into an `and`! Godbolt link: https://godbolt.org/z/TqK5f588f