bytecodealliance / wasm-micro-runtime

WebAssembly Micro Runtime (WAMR)
Apache License 2.0
4.89k stars 623 forks source link

WAMR fails names.wast spec test #3858

Open sjamesr opened 3 days ago

sjamesr commented 3 days ago

Subject of the issue

WAMR currently fails the names.wast spec test, specifically because the spec permits module exports with names containing NUL bytes, but WAMR does not.

Test case

Run names.wast through iwasm using the spec test runner scripts.

Your environment

Steps to reproduce

  ;; Test the C0 control codes.
  (func (export "\00\01\02\03\04\05\06\07\08\09\0a\0b\0c\0d\0e\0f") (result i32) (i32.const 22))

and later

(assert_return (invoke "\00\01\02\03\04\05\06\07\08\09\0a\0b\0c\0d\0e\0f") (i32.const 22))

Expected behavior

The tests should pass.

Actual behavior

This fails, because the wasm_lookup_function function stops searching when it sees the null byte at the beginning of the function.

Extra Info

I have the beginnings of a proposed fix which adds a size_t name_len field wherever export names are stored. This will change the AOT module representation, I don't know how much of a big deal this is.

The patch below modifies the loader to feed the export name size through to the new name_len field. It also adds a wasm_runtime_lookup_function_raw function that takes the extra size_t parameter.

I am curious if there is any interest in pursuing this change, or if WAMR deliberately does not want to support this aspect of the specification.

diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c
index 63a3c83c..71e3501d 100644
--- a/core/iwasm/aot/aot_runtime.c
+++ b/core/iwasm/aot/aot_runtime.c
@@ -2185,6 +2185,22 @@ aot_lookup_function(const AOTModuleInstance *module_inst, const char *name)
     return NULL;
 }

+AOTFunctionInstance *
+aot_lookup_function_raw(const AOTModuleInstance *module_inst,
+                        const char *name, size_t name_len)
+{
+    uint32 i;
+    AOTFunctionInstance *export_funcs =
+        (AOTFunctionInstance *)module_inst->export_functions;
+
+    for (i = 0; i < module_inst->export_func_count; i++)
+        if (export_funcs[i].func_name_len == name_len
+            && !memcmp(export_funcs[i].func_name, name, name_len))
+            return &export_funcs[i];
+    return NULL;
+}
+
+
 #ifdef OS_ENABLE_HW_BOUND_CHECK
 static bool
 invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h
index 9c73dd40..7a430295 100644
--- a/core/iwasm/aot/aot_runtime.h
+++ b/core/iwasm/aot/aot_runtime.h
@@ -97,6 +97,7 @@ typedef struct AOTRelocationGroup {
 /* AOT function instance */
 typedef struct AOTFunctionInstance {
     char *func_name;
+    size_t func_name_len;
     uint32 func_index;
     bool is_import_func;
     union {
@@ -544,6 +545,18 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst);
 AOTFunctionInstance *
 aot_lookup_function(const AOTModuleInstance *module_inst, const char *name);

+/**
+ * Lookup an exported function in the AOT module instance.
+ *
+ * @param module_inst the module instance
+ * @param name the name of the function
+ *
+ * @return the function instance found
+ */
+AOTFunctionInstance *
+aot_lookup_function_raw(const AOTModuleInstance *module_ins,
+                        const char *name, size_t name_len);
+
 AOTMemoryInstance *
 aot_lookup_memory(AOTModuleInstance *module_inst, char const *name);

diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c
index 453cf9e3..389907b7 100644
--- a/core/iwasm/common/wasm_runtime_common.c
+++ b/core/iwasm/common/wasm_runtime_common.c
@@ -2259,6 +2259,23 @@ wasm_runtime_lookup_function(WASMModuleInstanceCommon *const module_inst,
     return NULL;
 }

+WASMFunctionInstanceCommon *
+wasm_runtime_lookup_function_raw(WASMModuleInstanceCommon *const module_inst,
+                                 const char *name, size_t name_len)
+{
+#if WASM_ENABLE_INTERP != 0
+    if (module_inst->module_type == Wasm_Module_Bytecode)
+        return (WASMFunctionInstanceCommon *)wasm_lookup_function_raw(
+            (const WASMModuleInstance *)module_inst, name, name_len);
+#endif
+#if WASM_ENABLE_AOT != 0
+    if (module_inst->module_type == Wasm_Module_AoT)
+        return (WASMFunctionInstanceCommon *)aot_lookup_function_raw(
+            (const AOTModuleInstance *)module_inst, name, name_len);
+#endif
+    return NULL;
+}
+
 uint32
 wasm_func_get_param_count(WASMFunctionInstanceCommon *const func_inst,
                           WASMModuleInstanceCommon *const module_inst)
diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h
index fb2c7940..b9c4717d 100644
--- a/core/iwasm/common/wasm_runtime_common.h
+++ b/core/iwasm/common/wasm_runtime_common.h
@@ -616,6 +616,11 @@ WASM_RUNTIME_API_EXTERN WASMFunctionInstanceCommon *
 wasm_runtime_lookup_function(WASMModuleInstanceCommon *const module_inst,
                              const char *name);

+/* See wasm_export.h for description */
+WASM_RUNTIME_API_EXTERN WASMFunctionInstanceCommon *
+wasm_runtime_lookup_function_raw(WASMModuleInstanceCommon *const module_inst,
+                                 const char *name, size_t name_len);
+
 /* Internal API */
 WASMFuncType *
 wasm_runtime_get_function_type(const WASMFunctionInstanceCommon *function,
diff --git a/core/iwasm/include/wasm_export.h b/core/iwasm/include/wasm_export.h
index c9037b3c..81be7566 100644
--- a/core/iwasm/include/wasm_export.h
+++ b/core/iwasm/include/wasm_export.h
@@ -12,6 +12,7 @@
 #ifndef _WASM_EXPORT_H
 #define _WASM_EXPORT_H

+#include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include "lib_export.h"
@@ -786,6 +787,18 @@ WASM_RUNTIME_API_EXTERN wasm_function_inst_t
 wasm_runtime_lookup_function(const wasm_module_inst_t module_inst,
                              const char *name);

+/**
+ * Lookup an exported function in the WASM module instance.
+ *
+ * @param module_inst the module instance
+ * @param name the name of the function
+ *
+ * @return the function instance found, NULL if not found
+ */
+WASM_RUNTIME_API_EXTERN wasm_function_inst_t
+wasm_runtime_lookup_function_raw(const wasm_module_inst_t module_inst,
+                                 const char *name, size_t name_len);
+
 /**
  * Get parameter count of the function instance
  *
diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h
index 0aefd30c..9555cca9 100644
--- a/core/iwasm/interpreter/wasm.h
+++ b/core/iwasm/interpreter/wasm.h
@@ -731,6 +731,7 @@ struct WASMGlobal {

 typedef struct WASMExport {
     char *name;
+    size_t name_len;
     uint8 kind;
     uint32 index;
 } WASMExport;
diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c
index ff3501e3..4368f0d9 100644
--- a/core/iwasm/interpreter/wasm_loader.c
+++ b/core/iwasm/interpreter/wasm_loader.c
@@ -4049,6 +4049,7 @@ load_export_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
     uint32 str_len;
     WASMExport *export;
     const char *name;
+    size_t name_len;

     read_leb_uint32(p, p_end, export_count);

@@ -4076,7 +4077,8 @@ load_export_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,

             for (j = 0; j < i; j++) {
                 name = module->exports[j].name;
-                if (strlen(name) == str_len && memcmp(name, p, str_len) == 0) {
+                name_len = module->exports[j].name_len;
+                if (name_len == str_len && memcmp(name, p, str_len) == 0) {
                     set_error_buf(error_buf, error_buf_size,
                                   "duplicate export name");
                     return false;
@@ -4089,6 +4091,7 @@ load_export_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module,
                 return false;
             }

+            export->name_len = str_len;
             p += str_len;
             CHECK_BUF(p, p_end, 1);
             export->kind = read_uint8(p);
diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c
index e4142ab8..767b86b5 100644
--- a/core/iwasm/interpreter/wasm_runtime.c
+++ b/core/iwasm/interpreter/wasm_runtime.c
@@ -1387,6 +1387,7 @@ export_functions_instantiate(const WASMModule *module,
     for (i = 0; i < module->export_count; i++, export ++)
         if (export->kind == EXPORT_KIND_FUNC) {
             export_func->name = export->name;
+            export_func->name_len = export->name_len;
             export_func->function = &module_inst->e->functions[export->index];
             export_func++;
         }
@@ -3412,6 +3413,18 @@ wasm_lookup_function(const WASMModuleInstance *module_inst, const char *name)
     return NULL;
 }

+WASMFunctionInstance *
+wasm_lookup_function_raw(const WASMModuleInstance *module_inst,
+                         const char *name, size_t name_len)
+{
+    uint32 i;
+    for (i = 0; i < module_inst->export_func_count; i++)
+        if (module_inst->export_functions[i].name_len == name_len
+            && !memcmp(module_inst->export_functions[i].name, name, name_len))
+            return module_inst->export_functions[i].function;
+    return NULL;
+}
+
 WASMMemoryInstance *
 wasm_lookup_memory(const WASMModuleInstance *module_inst, const char *name)
 {
@@ -4855,12 +4868,7 @@ wasm_check_utf8_str(const uint8 *str, uint32 len)
     while (p < p_end) {
         chr = *p;

-        if (chr == 0) {
-            LOG_WARNING(
-                "LIMITATION: a string which contains '\\00' is unsupported");
-            return false;
-        }
-        else if (chr < 0x80) {
+        if (chr < 0x80) {
             p++;
         }
         else if (chr >= 0xC2 && chr <= 0xDF && p + 1 < p_end) {
diff --git a/core/iwasm/interpreter/wasm_runtime.h b/core/iwasm/interpreter/wasm_runtime.h
index e46b63cd..4fbf65b5 100644
--- a/core/iwasm/interpreter/wasm_runtime.h
+++ b/core/iwasm/interpreter/wasm_runtime.h
@@ -258,6 +258,7 @@ struct WASMTagInstance {
 #endif
 typedef struct WASMExportFuncInstance {
     char *name;
+    size_t name_len;
     WASMFunctionInstance *function;
 } WASMExportFuncInstance;

@@ -546,6 +547,10 @@ wasm_set_running_mode(WASMModuleInstance *module_inst,
 WASMFunctionInstance *
 wasm_lookup_function(const WASMModuleInstance *module_inst, const char *name);

+WASMFunctionInstance *
+wasm_lookup_function_raw(const WASMModuleInstance *module_inst,
+                         const char *name, size_t name_len);
+
 WASMMemoryInstance *
 wasm_lookup_memory(const WASMModuleInstance *module_inst, const char *name);
wenyongh commented 3 days ago

Hi, this is a limitation and we decided not to support it, see: https://github.com/bytecodealliance/wasm-micro-runtime/issues/2789

Is there any actual use case from you?

sjamesr commented 3 days ago

My team is evaluating several WASM engines for use in various projects. One of my tasks is to get the spec tests running with our internal CI, and to determine which tests we should skip/ignore.

While I'm not sure why a user would need NULs in their export/import names, it is part of the spec. Should the engine not strive for spec compliance where reasonably possible? This seems like not much additional work.

wenyongh commented 2 days ago

The string is stored into const string set, and is also emitted into AOT file, and loaded again in aot loader. All the operations assume the string is null terminated, it may be very complex to change their behaviors and it is easy to make things wrong. A workaround that I can think is to change the character \0 into a non-utf8 character (e.g. 0x80) in the string before storing into the const string set, and in wasm_runtime_lookup_function_raw, do the same change for the input string and use the changed string to call wasm_runtime_lookup_function. The patch is like below, could you check whether it is good to you?

diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c
index cc3b4bb9..3f753763 100644
--- a/core/iwasm/common/wasm_runtime_common.c
+++ b/core/iwasm/common/wasm_runtime_common.c
@@ -2277,6 +2277,32 @@ wasm_runtime_lookup_function(WASMModuleInstanceCommon *const module_inst,
     return NULL;
 }

+WASMFunctionInstanceCommon *
+wasm_runtime_lookup_function_raw(WASMModuleInstanceCommon *const module_inst,
+                                 const char *name, uint32 len)
+{
+    WASMFunctionInstanceCommon *func;
+    char *name_c_str;
+    uint32 i;
+
+    if (!(name_c_str = wasm_runtime_malloc(len + 1))) {
+        return NULL;
+    }
+
+    bh_memcpy_s(name_c_str, len, name, len);
+    name_c_str[len] = '\0';
+
+    for (i = 0; i < len; i++) {
+        if (name_c_str[i] == '\0')
+            name_c_str[i] = 0x80;
+    }
+
+    func = wasm_runtime_lookup_function(module_inst, name_c_str);
+
+    wasm_runtime_free(name_c_str);
+    return func;
+}
+
 uint32
 wasm_func_get_param_count(WASMFunctionInstanceCommon *const func_inst,
                           WASMModuleInstanceCommon *const module_inst)
diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h
index fb2c7940..58f869b4 100644
--- a/core/iwasm/common/wasm_runtime_common.h
+++ b/core/iwasm/common/wasm_runtime_common.h
@@ -616,6 +616,10 @@ WASM_RUNTIME_API_EXTERN WASMFunctionInstanceCommon *
 wasm_runtime_lookup_function(WASMModuleInstanceCommon *const module_inst,
                              const char *name);

+WASM_RUNTIME_API_EXTERN WASMFunctionInstanceCommon *
+wasm_runtime_lookup_function_raw(WASMModuleInstanceCommon *const module_inst,
+                                 const char *name, uint32 len);
+
 /* Internal API */
 WASMFuncType *
 wasm_runtime_get_function_type(const WASMFunctionInstanceCommon *function,
diff --git a/core/iwasm/include/wasm_export.h b/core/iwasm/include/wasm_export.h
index dfb6b802..aaab591e 100644
--- a/core/iwasm/include/wasm_export.h
+++ b/core/iwasm/include/wasm_export.h
@@ -793,6 +793,10 @@ WASM_RUNTIME_API_EXTERN wasm_function_inst_t
 wasm_runtime_lookup_function(const wasm_module_inst_t module_inst,
                              const char *name);

+wasm_function_inst_t
+wasm_runtime_lookup_function_raw(const wasm_module_inst_t module_inst,
+                                 const char *name, uint32 len);
+
 /**
  * Get parameter count of the function instance
  *
diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c
index f6d5e103..47883a7b 100644
--- a/core/iwasm/interpreter/wasm_runtime.c
+++ b/core/iwasm/interpreter/wasm_runtime.c
@@ -4870,12 +4870,7 @@ wasm_check_utf8_str(const uint8 *str, uint32 len)
     while (p < p_end) {
         chr = *p;

-        if (chr == 0) {
-            LOG_WARNING(
-                "LIMITATION: a string which contains '\\00' is unsupported");
-            return false;
-        }
-        else if (chr < 0x80) {
+        if (chr < 0x80) {
             p++;
         }
         else if (chr >= 0xC2 && chr <= 0xDF && p + 1 < p_end) {
@@ -4936,6 +4931,7 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module,
                            uint32 error_buf_size)
 {
     StringNode *node, *node_next;
+    uint32 i;

     if (!wasm_check_utf8_str(str, len)) {
         set_error_buf(error_buf, error_buf_size, "invalid UTF-8 encoding");
@@ -4946,6 +4942,11 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module,
         return "";
     }
     else if (is_load_from_file_buf) {
+        for (i = 0; i < len; i++) {
+            if (str[i] == '\0')
+                ((char *)str)[i] = 0x80;
+        }
+
         /* As the file buffer can be referred to after loading, we use
            the previous byte of leb encoded size to adjust the string:
            move string 1 byte backward and then append '\0' */
@@ -4977,6 +4978,11 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module,
     bh_memcpy_s(node->str, len + 1, str, len);
     node->str[len] = '\0';

+    for (i = 0; i < len; i++) {
+        if (node->str[i] == '\0')
+            node->str[i] = 0x80;
+    }
+
     if (!module->const_str_list) {
         /* set as head */
         module->const_str_list = node;
yamt commented 2 days ago

and in wasm_runtime_lookup_function_raw, do the same change for the input string and use the changed string to call wasm_runtime_lookup_function.

if you mean to expose wasm_runtime_lookup_function_raw to users, i guess it should perform utf-8 validation on the user-given name before escaping 0. also, it's better to make it possible for users to distinguish "not found" and "memory allocation failure".

wenyongh commented 2 days ago

and in wasm_runtime_lookup_function_raw, do the same change for the input string and use the changed string to call wasm_runtime_lookup_function.

if you mean to expose wasm_runtime_lookup_function_raw to users, i guess it should perform utf-8 validation on the user-given name before escaping 0. also, it's better to make it possible for users to distinguish "not found" and "memory allocation failure".

Totally agree, thanks.