Robbepop / runwell

An experimental WebAssembly virtual machine.
Apache License 2.0
12 stars 2 forks source link

Implement conversion from Wasm to Runwell IR #2

Open Robbepop opened 4 years ago

Robbepop commented 4 years ago

The runwell IR is the intermediate representation of the runwell JIT compiler. It is an SSA based bytecode that is inspired by both Wasm and LLVM IR.

Example Transformation

Below we show an example transformation of the Wasm input function and the resulting runwell IR of the same function.

The Wasm function identified by 89 is the function we want to translate. It takes a single parameter of type i32, returns nothing and has 2 local variables of type i32. Internally it calls function 120 and function 78.

function 89: [i32] => []
locals
    2 x i32
end
body
    global.get 0
    i32.const 16
    i32.sub
    local.tee 1
    global.set 0
    local.get 1
    i32.const 8
    i32.add
    local.get 0
    call 120
    block
        local.get 1
        i32.load offset: 12, alignment: 2
        local.tee 2
        eqz
        brif 0
        local.get 0
        i32.load offset: 0, alignment: 2
        local.get 1
        i32.load offset: 8, alignment: 2
        local.get 2
        call 76
    end
    local.get 1
    i32.const 16
    i32.add
    global.set 0
end

With internally called functions 120 and 76: (Note that we are only interested in the conversion of function 89.)

function 120: [i32, i32] => []
body
    local.get 0
    local.get 0
    i32.load offset: 8, alignment: 2
    local.get 1
    call 123
end

function 76: [i32, i32] => [i32]
locals
    1 x i32
end
body
    local.get 0
    i32.load offset: 0, alignment: 2
    local.set 2
    local.get 0
    local.get 1
    i32.store32
    local.get 2
end

The final translated runwell IR function: It consists of 3 basic blocks. Note that this SSA representation is not minimal not pruned and not optimized.

function 89: [%0: i32] => []
    %1 <- i32.local
    %2 <- i32.local
    %3 <- global.get 0
    %4 <- i32.const 16
    %5 <- i32.sub %3 %4
    local.set 0 <- %5
    global.set 0 <- %5
    %6 <- i32.const 8
    %7 <- i32.add %5 %6
    %8 <- local.get 0
    %9 <- call 120 (%7, %8)
    %10 <- local.get 1
    %11 <- i32.load 0 offset: 12, alignment: 2
    local.set 2 <- %11
    %12 <- local.get 2
    %13 <- i32.const 0
    %14 <- i32.cmp -eq %12 %13
    ite %14 %15 %16

%15 <- block:
    %17 <- local.get 0
    %18 <- i32.load 0 offset: 0, alignment: 2
    %19 <- local.get 1
    %20 <- i32.load 0 offset: 8, alignment: 2
    %21 <- local.get 2
    %22 <- call 76 (%20, %21)
    br %16

%16 <- block:
    %23 <- local.get 1
    %24 <- i32.const 16
    %25 <- i32.add %23 %24
    global.set 0 <- %25
    return

Example 2

Another simpler example that doesn't include branches shows that our IR is very similar to Wasm.

Before:

function 166: [i32, i32] => [i32]
body
    local.get 1
    i32.load offset: 24, alignment: 2
    i32.const 68344
    i32.const 11
    local.get 1
    i32.const 28
    i32.add
    i32.load offset: 0, alignment: 2
    i32.load offset: 12, alignment: 2
    callindirect table: 0, offset: 4
end

After:

function 166: [%0: i32, %1: i32] => [i32]
body
    %2 <- i32.get local 1
    %3 <- i32.load offset: 24, alignment: 2 at %2
    %4 <- i32.const 68344
    %5 <- i32.const 11
    %6 <- i32.get local 1
    %7 <- i32.const 28
    %8 <- i32.add %7 %6
    %9 <- i32.load offset: 0, alignment: 2 at %8
    %10 <- i32.load offset: 12, alignment: 2 %5
    %11 <- call.indirect table: 0, offset: 4 (%9, %10)
    return %11
end

Note that the function returned by call.indirect table: 0, offset: 4 has signature [i32, i32] => [i32].

Example 3

Given Wasm:

function 156: [I32] => [I32]
locals
    1 x i32
end
body
    block
        local.get 0
        i32.load offset 0, alignment 2
        i32.const -4
        i32.and
        local.tee 1
        local.get 0
        i32.sub
        i32.const -8
        i32.add
        local.tee 0
        local.get 1
        i32.ge_u

        brif 0
        local.get 0
        return
    end
    i32.const 68128
    i32.const 33
    i32.const 68248
    call 164
    unreachable
end

After runwell IR translation:

function 156: [%0: i32] => [i32]
    %1 <- i32.local
    %2 <- i32.get local %0
    %3 <- i32.load offset 0, aligment 2
    %4 <- i32.const -4
    %5 <- i32.and %3 %4
    i32.set local %1
    %7 <- i32.get local %0
    %8 <- i32.sub %6 %7
    %9 <- i32.const -8
    %10 <- i32.add %8 %9
    i32.set local %0
    %11 <- i32.get local %1
    %12 <- i32.cmp uge %11 %10
    ite %5 then %2 else %3

%2 <- block
    %14 <- i32.const 68128
    %15 <- i32.const 33
    %16 <- i32.const 68248
    call 164 params (%14, %15, %16)
    unreachable

%3 <- block
    %13 <- i32.get local %0
    return %13

Example 4 (with Stack Table)

Input Wasm function:

function 143: [I32, I32, I32, I32, I32] => [I32]
locals
    1 x i32
end
body
    block
        local.get 0
        local.get 4
        local.get 3
        call 159
        local.tee 5
        i32.eqz
        brif 0
        local.get 5
        local.get 1
        local.get 4
        local.get 2
        local.get 2
        local.get 4
        i32.ge_u
        select
        call 188

        drop
        local.get 0
        local.get 1
        local.get 2
        local.get 3
        call 160
    end
    local.get 5
end
function 159: [I32, I32, I32] => [I32]
function 160: [I32, I32, I32, I32] => []
function 188: [I32, I32, I32] => [I32]

Output runwell IR:

function 143: [
    %0: I32,
    %1: I32,
    %2: I32,
    %3: I32,
    %4: I32
] => [I32]
    %5 <- i32.local
    %7 <- i32.get local 0
    %8 <- i32.get local 4
    %9 <- i32.get local 3
    %10 <- call 159 params [%7, %8, %9]
    i32.set local %5
    %11 <- i32.const 0
    %12 <- i32.cmp eq %10 %11
    ite %12 then block %6 else block %13

%6 <- block
    %13 <- i32.get local %5
    return %13

%12 <- block
    %14 <- i32.get local %5
    %15 <- i32.get local %1
    %16 <- i32.get local %4
    %17 <- i32.get local %2
    %18 <- %17
    %19 <- %16
    %20 <- i32.cmp uge %18 %19
    %21 <- i32.select %20 %16 %17
    %22 <- call 188 params [%14, %15, %21]
    %23 <- i32.get local %0
    %24 <- i32.get local %1
    %25 <- i32.get local %2
    %26 <- i32.get local %3
    call 160 params [%23, %24, %25, %26]
    br block %6

Stack during computing of the runwell IR:

- entry
%7
%8
%9
- call 159
%10
- i32.set local %10
%10
- i32.const 0
%10
%11
- i32.cmp eq %10 %11
%12
- ite %12 then block %6 else block %12
- %12 <- block
%14
%15
%16
%17
%18
%19
- i32.cmp uge %18 %19
%14
%15
%16
%17
%20
- i32.select %20 then %16 else %17
%14
%15
%21
- call 188 params [%14, %15, %21]
%22
- drop (Wasm only)
- i32.get local {0, 1, 2, 3}
%23
%24
%25
%26
- call 160 params [%23, %24, %25, %26]
- br block %6
%13
- return %13

Design

The runwell IR is designed to represent Wasm bytecode. For local variables we use the local keyword where LLVM would have used alloca instead. All operations operate on the direct value, e.g.

%0 <- i32.const 5
%1 <- i32.const 42
%2 <- i32.add %0 %1

Instead of

%2 <- i32.add 5 42

While this creates a bigger IR it also heavily simplifies the representation and procedures on it. Note that it generally shouldn't impose a problem for the codegen backend to procude the same optimized code for both versions.

There are a few operations that operate on immediates, namely T.const, local.get, local.set, global.get, global.set, load, store and call. For T.const the immediate value is the constant itself while for local.get, local.set, global.get global.set, load, store and call the immediate value is the respective identifier of the operated on object, e.g. in the case of local.get the immediate value is the ID of the local variable.

Syntax

Syntactically the runwell IR shall be as simple as possible for readers while adhering to the Wasm and LLVM origins.

Robbepop commented 3 years ago

The Runwell IR builder initial implementation seems to be working as of recently. There are several test cases for which it already produces correct SSA IR code. The SSA conversion procedure implements the algorithm introduced in this paper.

The next step is to use the new Runwell IR builder API to construct SSA IR from given Wasm bytecode input.

Note that current tests do not automatically test the constructed functions for expected output since we are currently missing an API for that which is planned.

Some examples include the following:

Example: If Then Else

Given the following test code:

#[test]
fn simple_if() -> Result<(), IrError> {
    let mut b = Function::build()
        .with_inputs(&[IntType::I32.into()])?
        .with_outputs(&[])?
        .declare_variables(1, IntType::I32.into())?
        .body();

    let then_block = b.create_block();
    let else_block = b.create_block();
    let exit_block = b.create_block();

    let input = Variable::from_raw(RawIdx::from_u32(0));
    let var = Variable::from_raw(RawIdx::from_u32(1));

    let v0 = b.read_var(input)?;
    let v1 = b.ins()?.constant(IntConst::I32(0))?;
    let v2 = b.ins()?.icmp(IntType::I32, CompareIntOp::Eq, v0, v1)?;
    b.ins()?.if_then_else(v2, then_block, else_block)?;

    b.switch_to_block(then_block)?;
    let v3 = b.ins()?.constant(IntConst::I32(10))?;
    b.write_var(var, v3)?;
    b.ins()?.br(exit_block)?;
    b.seal_block()?;

    b.switch_to_block(else_block)?;
    let v4 = b.ins()?.constant(IntConst::I32(20))?;
    b.write_var(var, v4)?;
    b.ins()?.br(exit_block)?;
    b.seal_block()?;

    b.switch_to_block(exit_block)?;
    let v5 = b.read_var(var)?;
    b.ins()?.return_value(v5)?;
    b.seal_block()?;

    let fun = b.finalize()?;
    println!("{}", fun);

    Ok(())
}

The test prints out the following constructed IR function:

fn (v0: i32)
bb0:
    v1: i32 = const 0
    v2: i32 = icmp i32 eq v0 v1
    if v2 then bb1 else bb2
bb1:
    v3: i32 = const 10
    br bb3
bb2:
    v4: i32 = const 20
    br bb3
bb3:
    v5: i32 = ϕ [ bb1 -> v3, bb2 -> v4 ]
    ret v5

Example: Counter Loop

Given the following test code:

#[test]
fn simple_loop() -> Result<(), IrError> {
    let mut b = Function::build()
        .with_inputs(&[IntType::I32.into()])?
        .with_outputs(&[])?
        .declare_variables(1, IntType::I32.into())?
        .body();

    let loop_head = b.create_block();
    let loop_body = b.create_block();
    let loop_exit = b.create_block();

    let input = Variable::from_raw(RawIdx::from_u32(0));
    let counter = Variable::from_raw(RawIdx::from_u32(1));

    let v0 = b.ins()?.constant(IntConst::I32(0))?;
    b.write_var(counter, v0)?;
    b.ins()?.br(loop_head)?;

    b.switch_to_block(loop_head)?;
    let v1 = b.read_var(counter)?;
    let v2 = b.read_var(input)?;
    let v3 = b.ins()?.icmp(IntType::I32, CompareIntOp::Slt, v1, v2)?;
    b.ins()?.if_then_else(v3, loop_body, loop_exit)?;

    b.switch_to_block(loop_body)?;
    let v4 = b.read_var(counter)?;
    let v5 = b.ins()?.constant(IntConst::I32(1))?;
    let v6 = b.ins()?.iadd(IntType::I32, v4, v5)?;
    b.write_var(counter, v6)?;
    b.ins()?.br(loop_head)?;
    b.seal_block()?;

    b.switch_to_block(loop_head)?;
    b.seal_block()?;

    b.switch_to_block(loop_exit)?;
    let v7 = b.read_var(counter)?;
    b.ins()?.return_value(v7)?;
    b.seal_block()?;

    let fun = b.finalize()?;
    println!("{}", fun);

    Ok(())
}

The test prints out the following constructed IR function:

fn (v0: i32)
bb0:
    v1: i32 = const 0
    br bb1
bb1:
    v2: i32 = ϕ [ bb0 -> v1, bb2 -> v7 ]
    v4: i32 = scmp i32 slt v2 v0
    if v4 then bb2 else bb3
bb2:
    v6: i32 = const 1
    v7: i32 = iadd i32 v2 v6
    br bb1
bb3:
    ret v2