ssrg-vt / popcorn-compiler

Popcorn Linux compiler toolchain for heterogeneous-ISA execution
42 stars 22 forks source link

struct stat wrong value #36

Closed xjtuwxg closed 5 years ago

xjtuwxg commented 6 years ago

Background

I'm trying to port a simple web server to popcorn. I compiled the tiny web server w/o inserting any migration point got wrong result.

Environment configuration

Popcorn-compiler: master (commit: 59ea7bbe13aef7abbe556dbf9489a09134852aab) clang, musl-gcc w/ static link works correctly.

Analysis

When compiled w/ popcorn-compiler, the web server always returns "Unknown Error". This is because of the wrong sbuf.st_mode value in line 359.

Debug w/ GDB, popcorn-compiler get the struct stat sbuf with st_mode set 1:

(gdb) p sbuf
$13 = {st_dev = 2049, st_ino = 22551840, st_mode = 1, st_nlink = 0, st_uid = 33188, st_gid = 1000, st_rdev = 1000,
  __pad = 0, st_size = 6, st_blksize = 4096, __pad2 = 0, st_blocks = 8, st_atim = {tv_sec = 1533149490,
    tv_nsec = 705858674}, st_mtim = {tv_sec = 1532621795, tv_nsec = 412033015}, st_ctim = {tv_sec = 1532621795,
    tv_nsec = 412033015}, __unused = {0, 0}}

The musl-gcc version binary show the correct st_mode is 33188 (10064 oct).

(gdb) p sbuf
$6 = {st_dev = 2049, st_ino = 22551840, st_nlink = 1, st_mode = 33188, st_uid = 1000, st_gid = 1000, __pad0 = 0,
  st_rdev = 0, st_size = 6, st_blksize = 4096, st_blocks = 8, st_atim = {tv_sec = 1533149490, tv_nsec = 705858674},
  st_mtim = {tv_sec = 1532621795, tv_nsec = 412033015}, st_ctim = {tv_sec = 1532621795, tv_nsec = 412033015}, __unused = {0,
    0, 0}}

In /usr/include/x86_64-linux-gnu/bits/stat.h defines Regular File: #define __S_IFREG 0100000 /* Regular file. */ So fstat a regular file should return a 1xxxxx in oct, instead of 1 in our case.

Root cause

Rob mentioned for the libraries, we automatically use the aarch64 library. I checked the Linux uapi headers (should be same w/ glibc): On ARM header (./include/uapi/asm-generic/stat.h):

struct stat {
    unsigned long   st_dev;     /* Device.  */
    unsigned long   st_ino;     /* File serial number.  */
    unsigned int    st_mode;    /* File mode.  */
    unsigned int    st_nlink;   /* Link count.  */

On x86 header (./arch/x86/include/uapi/asm/stat.h):

struct stat {
        __kernel_ulong_t        st_dev;
        __kernel_ulong_t        st_ino;
        __kernel_ulong_t        st_nlink;

        unsigned int            st_mode;
        unsigned int            st_uid;
        unsigned int            st_gid;

Consider the GDB info and these headers, this is problem the reason.

acarno commented 6 years ago

I ran into (what I think is) a similar issue with struct epoll_event, which differs in size between x86_64 and aarch64 due to different structure-packing settings. Chiefly, on x86_64, the kernel expects a 12-byte struct; on aarch64, the kernel expects a 16-byte struct.

The issue I ran into -- and that I suspect you might run into here -- is that changing the kernel's definition of struct stat will break every existing program; however, changing the user's definition will break Popcorn applications on the aarch64 kernel only.

My ad-hoc solution involved declaring the "kernel-correct" version of the structure inside musl's epoll.c file and copying each field from the "user-correct" version into the corresponding field of the "kernel-correct" structure, using temporary stack-allocated variables. Needless to say, this is a terrible workaround:

diff --git a/lib/musl-1.1.18/src/linux/epoll.c b/lib/musl-1.1.18/src/linux/epoll.c
index deff5b1..0df118e 100644
--- a/lib/musl-1.1.18/src/linux/epoll.c
+++ b/lib/musl-1.1.18/src/linux/epoll.c
@@ -1,8 +1,19 @@
 #include <sys/epoll.h>
 #include <signal.h>
 #include <errno.h>
+#if !defined(__x86_64__)
+#include <string.h>
+#endif
 #include "syscall.h"

+#if !defined(__x86_64__)
+struct epoll_event16 {
+    uint32_t events;
+    char pad[4];
+    epoll_data_t data;
+};
+#endif
+
 int epoll_create(int size)
 {
    return epoll_create1(0);
@@ -19,16 +30,42 @@ int epoll_create1(int flags)

 int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev)
 {
+#if !defined(__x86_64__)
+    struct epoll_event16 ev16;
+    if (ev) {
+        memcpy(&ev16.events, &ev->events, sizeof (uint32_t));
+        memcpy(&ev16.data, &ev->data, sizeof (epoll_data_t));
+        return syscall(SYS_epoll_ctl, fd, op, fd2, &ev16);
+    }
+    else {
+        return syscall(SYS_epoll_ctl, fd, op, fd2, NULL);
+    }
+#else
    return syscall(SYS_epoll_ctl, fd, op, fd2, ev);
+#endif
 }

 int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs)
 {
+#if !defined(__x86_64__)
+    struct epoll_event16 ev16[cnt]; //VLAs are evil!
+    memset(ev16, 0, sizeof ev16[0] * cnt);
+    int r = __syscall(SYS_epoll_pwait, fd, ev16, cnt, to, sigs, _NSIG/8);
+#ifdef SYS_epoll_wait
+   if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev16, cnt, to);
+#endif
+    for (int i = 0; i < cnt; ++i) {
+        memcpy(&ev[i].events, &ev16[i].events, sizeof (uint32_t));
+        memcpy(&ev[i].data, &ev16[i].data, sizeof (epoll_data_t));
+    }
+    return __syscall_ret(r);
+#else
    int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8);
 #ifdef SYS_epoll_wait
    if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to);
 #endif
    return __syscall_ret(r);
+#endif
 }

 int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to)
xjtuwxg commented 6 years ago

Thank Anthony for the comment. Do you mean I should modify the popcorn-compiler musl source to adjust to the kernel's declaration? I thought this is a problem caused by the different header files of x86/arm.

acarno commented 6 years ago

Before I answer that, let me ask this question to clarify:

-- Are the x86/arm kernel declarations the same? -- If so, do the popcorn-compiler musl headers match either of the kernel declarations, or none of them?

xjtuwxg commented 6 years ago

Hey guys. [Good news] I wrote a patch inmusl-1.1.18/src/stat/fstat.c to marshal the struct stat from x86 to aarch64, it solves the tiny web server problem right now. [Bad news] It still has wrong result when passing the struct stat as a parameter to another function. I'll write another issue for that.

The patch for fstat is:

$ git diff 4ffd16e00d70a40b4cdde9507f6eddadffef62b8
diff --git a/lib/musl-1.1.18/src/stat/fstat.c b/lib/musl-1.1.18/src/stat/fstat.c
index ab4afc0..d3b47ad 100644
--- a/lib/musl-1.1.18/src/stat/fstat.c
+++ b/lib/musl-1.1.18/src/stat/fstat.c
@@ -4,10 +4,95 @@
 #include "syscall.h"
 #include "libc.h"

+#if defined(__x86_64__)
+#include <string.h>
+
+// The syscall on x86 returns the x86 version of struct stat.
+// But the x86 binary actually uses the arm version of stat.
+// copied from arch/x86_64/bits/stat.h
+struct stat_x86 {
+       dev_t st_dev;
+       ino_t st_ino;
+       nlink_t st_nlink;
+
+       mode_t st_mode;
+       uid_t st_uid;
+       gid_t st_gid;
+       unsigned int    __pad0;
+       dev_t st_rdev;
+       off_t st_size;
+       blksize_t st_blksize;
+       blkcnt_t st_blocks;
+
+       struct timespec st_atim;
+       struct timespec st_mtim;
+       struct timespec st_ctim;
+       long __unused[3];
+};
+
+// Copied from arch/aarch64/bits/stat.h
+struct stat_arm {
+       dev_t st_dev;
+       ino_t st_ino;
+       mode_t st_mode;
+       nlink_t st_nlink;
+       uid_t st_uid;
+       gid_t st_gid;
+       dev_t st_rdev;
+       unsigned long __pad;
+       off_t st_size;
+       blksize_t st_blksize;
+       int __pad2;
+       blkcnt_t st_blocks;
+       struct timespec st_atim;
+       struct timespec st_mtim;
+       struct timespec st_ctim;
+       unsigned __unused[2];
+};
+
+#define MARSHALLING_STAT(st, st_x86) \
+    memcpy(st, &st_x86, sizeof(struct stat)); \
+    st->st_mode = st_x86.st_mode; \
+    st->st_nlink = (unsigned)(st_x86.st_nlink);\
+    st->st_uid = st_x86.st_uid; \
+    st->st_gid = st_x86.st_gid; \
+    st->st_rdev = st_x86.st_rdev;
+
+#endif
 void __procfdname(char *, unsigned);

 int fstat(int fd, struct stat *st)
 {
+#if defined(__x86_64__)
+    struct stat_x86 st_x86;
+       int ret = __syscall(SYS_fstat, fd, &st_x86);
+
+    // convert a "kernel stat (x86_64)" to "musl stat (aarch64)"
+    MARSHALLING_STAT(((struct stat_arm *)st), st_x86)
+
+       if (ret != -EBADF || __syscall(SYS_fcntl, fd, F_GETFD) < 0) {
+               return __syscall_ret(ret);
+    }
+
+       char buf[15+3*sizeof(int)];
+       __procfdname(buf, fd);
+#ifdef SYS_stat
+       ret = syscall(SYS_stat, buf, &st_x86);
+
+    // convert a "kernel stat (x86_64)" to "musl stat (aarch64)"
+    MARSHALLING_STAT(((struct stat_arm *)st), st_x86)
+
+    return ret;
+#else
+       ret = syscall(SYS_fstatat, AT_FDCWD, buf, &st_x86, 0);
+
+    // convert a "kernel stat (x86_64)" to "musl stat (aarch64)"
+    MARSHALLING_STAT(((struct stat_arm *)st), st_x86)
+
+    return ret;
+#endif
+
+#else   // aarch64 default code
        int ret = __syscall(SYS_fstat, fd, st);
        if (ret != -EBADF || __syscall(SYS_fcntl, fd, F_GETFD) < 0)
                return __syscall_ret(ret);
@@ -19,6 +104,7 @@ int fstat(int fd, struct stat *st)
 #else
        return syscall(SYS_fstatat, AT_FDCWD, buf, st, 0);
 #endif
+#endif  // end if define(__x86_64__)
 }

 LFS64(fstat);