bytecodealliance / wasm-micro-runtime

WebAssembly Micro Runtime (WAMR)
Apache License 2.0
4.96k stars 624 forks source link

Native stack overflow issue when issuing wasm_runtime_init_thread_env() API #3909

Open rpatwa701 opened 2 days ago

rpatwa701 commented 2 days ago

Hi All,

I have developed a WASM containerized application. I run the application using iwasm runtime. I have a scenario where the WASM application registers a callback with the native library. I am using the below sequence of APIs to lookup and invoke the callback from the native library:

void register_callback_native(wasm_exec_env_t exec_env, int val, void *test, char *cb, int len)
{
    wasm_module_inst_t module_inst;
    wasm_function_inst_t func = NULL;
    struct example testapp = {10,'a'};
    uint32_t argv[1];

    argv[0] = 15;
    if (!wasm_runtime_init_thread_env()) {
        printf("Failed to initialize thread environment\n");
        return;
    }

    memcpy(test, &testapp, sizeof(struct example));
    module_inst = wasm_runtime_get_module_inst(exec_env);

    func = wasm_runtime_lookup_function(module_inst, cb);

    if (func == NULL) {
        printf("lookup failed\n");
    } else {
        if (!wasm_runtime_call_wasm(exec_env, func, 1, argv)) {
            printf("%s[%d]: %s", __func__, __LINE__,
                    wasm_runtime_get_exception(module_inst));
        }
    }
}

The above code works just fine on my x86 Ubuntu machine, however when I try running the same application on my ARM platform I see the below error: Exception: native stack overflow I narrowed it down and found wasm_runtime_init_thread_env() API is leading to this error.

I tried increassing the stack and heap size allocation when starting the application but that doesn't seem to help. Is there something missing here or am I using the wrong WASM runtime APIs?

TianlongLiang commented 2 days ago

Is it still Ubuntu, or is it MacOS or Windows?

rpatwa701 commented 1 day ago

It is still Ubuntu on ARM platform

rpatwa701 commented 1 day ago

Running this on iwasm-1.3.2

After putting additional debug we observe the issue is in the below call flow when we invoke wasm_runtime_init_thread_env(): wam_runtime_init_thread_env runtime_signal_init os_thread_signal_init init_stack_guard_pages touch_pages

In touch_pages() seems like after a point os_alloca() leads to the native stack overflow error. I observed touch_pages called even during wasm_runtime_init() and for that function I see the touch_pages() API returns succesfully. Attaching the logs with debug prints leading upto the error:

Entered touch_pages function
Stack Min Address: 0x7fe5bae000
Page Size: 4096
uint8 sum = 0;
while (1)
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
if (touch_addr < stack_min_addr + page_size)
*touch_addr = 0;
sum += *touch_addr;
volatile uint8 *touch_addr = (volatile uint8 *)os_alloca(page_size / 2);
Exception: native stack overflow

Would appreciate if someone can help understand when would os_alloca() call cause stack overflow and how can this issue be resolved.

rpatwa701 commented 1 day ago

Building iwasm with the below flags, attaching trimmed patch:

Index: wasm-micro-runtime-WAMR-1.3.2/CMakeLists.txt
===================================================================
--- wasm-micro-runtime-WAMR-1.3.2.orig/CMakeLists.txt
+++ wasm-micro-runtime-WAMR-1.3.2/CMakeLists.txt
@@ -17,6 +17,10 @@ set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS

 set (CMAKE_C_STANDARD 99)

+if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm")
+  set(WAMR_BUILD_TARGET "ARMv7")
+endif()

@@ -82,7 +86,7 @@ endif ()

+  set (WAMR_BUILD_LIB_PTHREAD 1)
 endif ()

 if (NOT DEFINED WAMR_BUILD_LIB_WASI_THREADS)
@@ -154,6 +158,8 @@ if (MINGW)
   target_link_libraries (iwasm_shared -lWs2_32)
 endif ()

+ADD_SUBDIRECTORY(product-mini/platforms/linux)
+

 # HEADERS
Index: wasm-micro-runtime-WAMR-1.3.2/product-mini/platforms/linux/CMakeLists.txt
===================================================================
--- wasm-micro-runtime-WAMR-1.3.2.orig/product-mini/platforms/linux/CMakeLists.txt
+++ wasm-micro-runtime-WAMR-1.3.2/product-mini/platforms/linux/CMakeLists.txt
@@ -71,6 +71,8 @@ if (NOT DEFINED WAMR_BUILD_LIBC_WASI)
   set (WAMR_BUILD_LIBC_WASI 1)
 endif ()

+set(WAMR_BUILD_LIBC_EMCC 1)

+  set (WAMR_BUILD_LIB_PTHREAD 1)
 endif ()

+set (RUNTIME_SOURCE_ALL
+    ${WAMR_ROOT_DIR}/product-mini/platforms/posix/main.c
+    ${UNCOMMON_SHARED_SOURCE}
+)
+
rpatwa701 commented 7 hours ago

I was able to go past the issue on my AARCH platform by building iwasm with cmake -DWAMR_DISABLE_STACK_HW_BOUND_CHECK=1 as suggested in this thread https://github.com/bytecodealliance/wasm-micro-runtime/issues/2901. However I am still unable to understand what this check does and what are the implications of disabling this?