bytecodealliance / wasm-micro-runtime

WebAssembly Micro Runtime (WAMR)
Apache License 2.0
4.99k stars 628 forks source link

vmcore: unaligned memory access on RISCV64 targets #2136

Open I-mikan-I opened 1 year ago

I-mikan-I commented 1 year ago

Description

ems_gc defines the macro GC_HEAD_PADDING 4, which causes the heap's base to be 32-bit aligned and trigger unaligned access faults on 64 bit targets.

Replicate

Target used: kendryte k210 main.c:

int main()
{
    char error_buf[128];
    wasm_module_t module;
    wasm_module_inst_t module_inst;
    wasm_function_inst_t func;
    wasm_exec_env_t exec_env;
    uint32_t size, stack_size = 8092, heap_size = 8092;

    /* initialize the wasm runtime by default configurations */
    RuntimeInitArgs args = {
        .mem_alloc_type = Alloc_With_System_Allocator,
    };
    wasm_runtime_full_init(&args);

    /* parse the WASM file from buffer and create a WASM module */
    module = wasm_runtime_load(aha_mont64, sizeof aha_mont64, error_buf, sizeof(error_buf));

    /* create an instance of the WASM module (WASM linear memory is ready) */
    module_inst = wasm_runtime_instantiate(module, stack_size, heap_size,
                                           error_buf, sizeof(error_buf));

    func = wasm_runtime_lookup_function(module_inst, "_start", NULL);
    exec_env = wasm_runtime_create_exec_env(module_inst, stack_size);
    wasm_val_t results[1];
    if (wasm_runtime_call_wasm_v(exec_env, func, 1, results, 0))
    {
        printf("wasm function return: %d\n", results[0].of.i32);
    }
    else
    {
        printf("error executing wasm function!\n%s\n", wasm_runtime_get_exception(module_inst));
    }

    wasm_runtime_destroy_exec_env(exec_env);
    wasm_runtime_deinstantiate(module_inst);
    wasm_runtime_unload(module);
    wasm_runtime_destroy();
}

CMakeLists.txt:

set (SHARED_PLATFORM_CONFIG src/platform/shared_platform.cmake)
set (WAMR_BUILD_PLATFORM "platform")
set (WAMR_BUILD_TARGET "RISCV64_LP64")
set (WAMR_BUILD_INTERP 1)
set (WAMR_BUILD_FAST_INTERP 0)
set (WAMR_BUILD_LIBC_BUILTIN 1)
set (WAMR_DISABLE_HW_BOUND_CHECK 0)
set (WAMR_DISABLE_STACK_HW_BOUND_CHECK 0)
set (WAMR_ROOT_DIR wasm-micro-runtime)

platform_api_vmcore.c

#include "platform_api_vmcore.h"
#include <stdio.h>
#include <stdarg.h>
/****************************************************
 *                     Section 1                    *
 *        Interfaces required by the runtime        *
 ****************************************************/

/**
 * Initialize the platform internal resources if needed,
 * this function is called by wasm_runtime_init() and
 * wasm_runtime_full_init()
 *
 * @return 0 if success
 */
int
bh_platform_init(void) {
    return 0;
}

/**
 * Destroy the platform internal resources if needed,
 * this function is called by wasm_runtime_destroy()
 */
void
bh_platform_destroy(void) {
    return 0;
}

/**
 ******** memory allocator APIs **********
 */

void *
os_malloc(unsigned size) {
    return malloc(size);
}

void *
os_realloc(void *ptr, unsigned size) {
    return realloc(ptr, size);
}

void
os_free(void *ptr) {
    return free(ptr);
}

/**
 * Note: the above APIs can simply return NULL if wasm runtime
 *       isn't initialized with Alloc_With_System_Allocator.
 *       Refer to wasm_runtime_full_init().
 */

int
os_printf(const char *format, ...) {
    va_list argptr;
    va_start(argptr, format);
    int result = vfprintf(stdout, format, argptr);
    va_end(argptr);
    return result;
}

int
os_vprintf(const char *format, va_list ap) {
    return vfprintf(stdout, format, ap);
}

/**
 * Get microseconds after boot.
 */
uint64
os_time_get_boot_microsecond(void) {
    return 0;
}

/**
 * Get current thread id.
 * Implementation optional: Used by runtime for logging only.
 */
korp_tid
os_self_thread(void) {
    return NULL;
}

extern uint64_t *__tp0;
/**
 * Get current thread's stack boundary address, used for runtime
 * to check the native stack overflow. Return NULL if it is not
 * easy to implement, but may have potential issue.
 */
uint8 *
os_thread_get_stack_boundary(void) {
    return (uint8 *)__tp0;
}

/**
 ************** mutext APIs ***********
 *  vmcore:  Not required until pthread is supported by runtime
 *  app-mgr: Must be implemented
 */

int
os_mutex_init(korp_mutex *mutex) {
    return 0;
}

int
os_mutex_destroy(korp_mutex *mutex) {
    return 0;
}

int
os_mutex_lock(korp_mutex *mutex) {
    return 0;
}

int
os_mutex_unlock(korp_mutex *mutex) {
    return 0;
}
I-mikan-I commented 1 year ago

Note, the problematic code is in ems_kfc.c:

gc_init_with_pool:
//...
base_addr =
        (char *)(((uintptr_t)base_addr + 7) & (uintptr_t)~7) + GC_HEAD_PADDING;
//...

gc_init_internal:
//...
q = (hmu_tree_node_t *)heap->base_addr;
q->size = heap->current_size; // STORE FAULT
wenyongh commented 1 year ago

Hi, q->size is uint32 tyte and it should be not an issue to access it. My understanding is the accessing of q->parent causes the issue. I uploaded PR #2140 to fix it, could you help have a try?

I-mikan-I commented 1 year ago

Doesn't seem to fix the issue:

V (18820889) SYSCALL: misaligned store recovered at 8001a990. len:08,addr:8004cd0c,reg:09,data:000000008004ac24,float:0                                                
core dump: misaligned load                                                                                                                                             
Cause 0x0000000000000004, EPC 0x0000000080007fbe                                                                                                                       
reg[00](zero ) = 0x0000000000000000, reg[01](ra   ) = 0x0000000080000222                                                                                               
reg[02](sp   ) = 0x00000000800311a0, reg[03](gp   ) = 0x00000000800236a0                                                                                               
reg[04](tp   ) = 0x00000000800298c0, reg[05](t0   ) = 0x0000000080000af8                                                                                               
reg[06](t1   ) = 0x0000000080007faa, reg[07](t2   ) = 0xffffffffffffffff                                                                                               
reg[08](s0/fp) = 0x000000008001a99a, reg[09](s1   ) = 0x000000008004ac24                                                                                               
reg[10](a0   ) = 0x0000000000000006, reg[11](a1   ) = 0x000000008001a99a                                                                                               
reg[12](a2   ) = 0x00000000800313c0, reg[13](a3   ) = 0x00000000800314c0                                                                                               
reg[14](a4   ) = 0x0000000000000003, reg[15](a5   ) = 0x0000000000000003                                                                                               
reg[16](a6   ) = 0x0000000080022070, reg[17](a7   ) = 0x00000000000000d6                                                                                               
reg[18](s2   ) = 0x000000008004ccf4, reg[19](s3   ) = 0x0000000000000001                                                                                               
reg[20](s4   ) = 0x000000008004cbc0, reg[21](s5   ) = 0x000000008004cbc0                                                                                               
reg[22](s6   ) = 0x0000000080031718, reg[23](s7   ) = 0x0000000000000080                                                                                               
reg[24](s8   ) = 0x0000000080039b20, reg[25](s9   ) = 0x0000000000000001                                                                                               
reg[26](s10  ) = 0x00000000800399e0, reg[27](s11  ) = 0x0000000000001fa0                                                                                               
reg[28](t3   ) = 0x000000000000001f, reg[29](t4   ) = 0x00000000000000f4                                                                                               
reg[30](t5   ) = 0x0000000080039bf0, reg[31](t6   ) = 0x00000000000000e0                                                                                               
freg[00](ft0 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c278()                                                                                               
freg[02](ft2 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c288()                                                                                               
freg[04](ft4 ) = 0x3432636100000000(), freg[-1073741824]() = 0x000000008001c298()                                                                                      
freg[06](ft6 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c2a8()                                                                                               
freg[08](fs0 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c2d0()                                                                                               
freg[10](fa0 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c300()                                                                                               
freg[12](fa2 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c328()                                                                                               
freg[14](fa4 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c338()                                                                                               
freg[16](fa6 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c348()                                                                                               
freg[18](fs2 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c370()                                                                                               
freg[20](fs4 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c380()                                                                                               
freg[22](fs6 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c390()                                                                                               
freg[24](fs8 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c3a0()                                                                                               
freg[26](fs10) = 0x0000000000000000(), freg[00]() = 0x000000008001c3b0()                                                                                               
freg[28](ft8 ) = 0x0000000000000000(), freg[00]() = 0x000000008001c3d8()                                                                                               
freg[30](ft10) = 0x0000000000000000(), freg[00]() = 0x000000008001c3e8()                                                                                               
W (105522967) SYSCALL: sys_exit called by core 0 with 0x539

dump:

Dump of assembler code for function gc_init_internal:
10      {
11          hmu_tree_node_t *root = NULL, *q = NULL;
12          int ret;
13
14          memset(heap, 0, sizeof *heap);
   0x000000008001a8de <+0>:     01 11   addi    sp,sp,-32
   0x000000008001a8e0 <+2>:     26 e4   sd      s1,8(sp)
   0x000000008001a8e2 <+4>:     4a e0   sd      s2,0(sp)
   0x000000008001a8e4 <+6>:     ae 84   mv      s1,a1
   0x000000008001a8e6 <+8>:     32 89   mv      s2,a2
   0x000000008001a8e8 <+10>:    81 45   li      a1,0
   0x000000008001a8ea <+12>:    13 06 80 16     li      a2,360
   0x000000008001a8ee <+16>:    22 e8   sd      s0,16(sp)
   0x000000008001a8f0 <+18>:    06 ec   sd      ra,24(sp)
   0x000000008001a8f2 <+20>:    2a 84   mv      s0,a0
   0x000000008001a8f4 <+22>:    97 60 fe ff     auipc   ra,0xfffe6
   0x000000008001a8f8 <+26>:    e7 80 80 1d     jalr    472(ra) # 0x80000acc <memset>

15
16          ret = os_mutex_init(&heap->lock);
   0x000000008001a8fc <+30>:    13 05 84 01     addi    a0,s0,24
   0x000000008001a900 <+34>:    97 60 fe ff     auipc   ra,0xfffe6
   0x000000008001a904 <+38>:    e7 80 80 c3     jalr    -968(ra) # 0x80000538 <os_mutex_init>

17          if (ret != BHT_OK) {
   0x000000008001a908 <+42>:    0d c1   beqz    a0,0x8001a92a <gc_init_internal+76>

18              os_printf("[GC_ERROR]failed to init lock\n");
   0x000000008001a90a <+44>:    17 65 00 00     auipc   a0,0x6
   0x000000008001a90e <+48>:    13 05 e5 9f     addi    a0,a0,-1538 # 0x80020308
   0x000000008001a912 <+52>:    97 60 fe ff     auipc   ra,0xfffe6
   0x000000008001a916 <+56>:    e7 80 80 bc     jalr    -1080(ra) # 0x800004da <os_printf>

19              return NULL;
   0x000000008001a91a <+60>:    01 44   li      s0,0
   0x000000008001a91c <+62>:    22 85   mv      a0,s0
   0x000000008001a91e <+64>:    e2 60   ld      ra,24(sp)
   0x000000008001a920 <+66>:    42 64   ld      s0,16(sp)
   0x000000008001a922 <+68>:    a2 64   ld      s1,8(sp)
   0x000000008001a924 <+70>:    02 69   ld      s2,0(sp)
   0x000000008001a926 <+72>:    05 61   addi    sp,sp,32
   0x000000008001a928 <+74>:    82 80   ret

20          }
21
22          /* init all data structures*/
23          heap->current_size = heap_max_size;
   0x000000008001a92a <+76>:    23 28 24 01     sw      s2,16(s0)

24          heap->base_addr = (gc_uint8 *)base_addr;
   0x000000008001a92e <+80>:    23 22 24 17     sw      s2,356(s0)
   0x000000008001a932 <+84>:    13 09 44 12     addi    s2,s0,292
   0x000000008001a936 <+88>:    04 e4   sd      s1,8(s0)

25          heap->heap_id = (gc_handle_t)heap;
   0x000000008001a938 <+90>:    00 e0   sd      s0,0(s0)

26
27          heap->total_free_size = heap->current_size;
28          heap->highmark_size = 0;
   0x000000008001a93a <+92>:    23 20 04 16     sw      zero,352(s0)

29
30          root = heap->kfc_tree_root = (hmu_tree_node_t *)heap->kfc_tree_root_buf;
   0x000000008001a93e <+96>:    23 38 24 15     sd      s2,336(s0)

31          memset(root, 0, sizeof *root);
   0x000000008001a942 <+100>:   13 06 80 02     li      a2,40
   0x000000008001a946 <+104>:   81 45   li      a1,0
   0x000000008001a948 <+106>:   4a 85   mv      a0,s2
   0x000000008001a94a <+108>:   97 60 fe ff     auipc   ra,0xfffe6
   0x000000008001a94e <+112>:   e7 80 20 18     jalr    386(ra) # 0x80000acc <memset>

32          root->size = sizeof *root;
   0x000000008001a952 <+116>:   93 07 80 02     li      a5,40
   0x000000008001a956 <+120>:   23 24 f4 12     sw      a5,296(s0)

33          hmu_set_ut(&root->hmu_header, HMU_FC);
34          hmu_set_size(&root->hmu_header, sizeof *root);
   0x000000008001a95a <+124>:   b7 07 00 40     lui     a5,0x40000
   0x000000008001a95e <+128>:   95 07   addi    a5,a5,5
   0x000000008001a960 <+130>:   23 22 f4 12     sw      a5,292(s0)

35
36          q = (hmu_tree_node_t *)heap->base_addr;
37          memset(q, 0, sizeof *q);
   0x000000008001a964 <+134>:   13 06 80 02     li      a2,40
   0x000000008001a968 <+138>:   81 45   li      a1,0
   0x000000008001a96a <+140>:   26 85   mv      a0,s1
   0x000000008001a96c <+142>:   97 60 fe ff     auipc   ra,0xfffe6
   0x000000008001a970 <+146>:   e7 80 00 16     jalr    352(ra) # 0x80000acc <memset>

38          hmu_set_ut(&q->hmu_header, HMU_FC);
39          hmu_set_size(&q->hmu_header, heap->current_size);
   0x000000008001a974 <+150>:   18 48   lw      a4,16(s0)

40
41          hmu_mark_pinuse(&q->hmu_header);
   0x000000008001a976 <+152>:   9c 40   lw      a5,0(s1)
   0x000000008001a978 <+154>:   b7 06 00 38     lui     a3,0x38000
   0x000000008001a97c <+158>:   13 06 e0 02     li      a2,46
   0x000000008001a980 <+162>:   f5 8f   and     a5,a5,a3
   0x000000008001a982 <+164>:   9b 56 37 00     srliw   a3,a4,0x3
   0x000000008001a986 <+168>:   d5 8f   or      a5,a5,a3
   0x000000008001a988 <+170>:   b7 06 00 60     lui     a3,0x60000
   0x000000008001a98c <+174>:   d5 8f   or      a5,a5,a3
   0x000000008001a98e <+176>:   9c c0   sw      a5,0(s1)

42          root->right = q;
   0x000000008001a990 <+178>:   23 3e 94 12     sd      s1,316(s0)

43          q->parent = root;
   0x000000008001a994 <+182>:   d8 c0   sw      a4,4(s1)
   0x000000008001a996 <+184>:   03 25 84 12     lw      a0,296(s0)
   0x000000008001a99a <+188>:   23 b0 24 03     sd      s2,32(s1)

44          q->size = heap->current_size;
45
46          bh_assert(root->size <= HMU_FC_NORMAL_MAX_SIZE);
   0x000000008001a99e <+192>:   97 66 00 00     auipc   a3,0x6
   0x000000008001a9a2 <+196>:   93 86 a6 98     addi    a3,a3,-1654 # 0x80020328
   0x000000008001a9a6 <+200>:   97 65 00 00     auipc   a1,0x6
   0x000000008001a9aa <+204>:   93 85 a5 9a     addi    a1,a1,-1622 # 0x80020350
   0x000000008001a9ae <+208>:   13 35 95 0f     sltiu   a0,a0,249
   0x000000008001a9b2 <+212>:   97 50 ff ff     auipc   ra,0xffff5
   0x000000008001a9b6 <+216>:   e7 80 60 84     jalr    -1978(ra) # 0x8000f1f8 <bh_assert_internal>

47
48          return heap;
   0x000000008001a9ba <+220>:   8d b7   j       0x8001a91c <gc_init_internal+62>
End of assembler dump.

The issue might be that the structs aren't packed, so the pointers will always be aligned to 8-byte boundaries within the struct.

wenyongh commented 1 year ago

The failure seems to be here:

42          root->right = q;
   0x000000008001a990 <+178>:   23 3e 94 12     sd      s1,316(s0)

43          q->parent = root;

Maybe you can add printf to dump the value of root, &root->right and &q->parent?

wenyongh commented 1 year ago

@I-mikan-I I submitted a patch set to fix the issues, could you please try again?