hermit-os / libhermit

HermitCore: A C-based, lightweight unikernel
https://hermitcore.org
BSD 3-Clause "New" or "Revised" License
220 stars 44 forks source link

Uhyve file read/write should not assume target buffer is contiguous in physical memory #86

Open olivierpierre opened 6 years ago

olivierpierre commented 6 years ago

Hello,

With Uhyve, when performing file read/write operations in the host, the target buffer is fully read or written from the guest physical memory. Such a design assumes that the buffer is contiguous in physical memory, which is not always the case. When using a buffer allocated with malloc, example of bad things that can happen are:

You can confirm these issues with these two sample programs and the corresponding Makefile:

/* read_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE       4096
#define FILE_TO_READ    "fread.bin"
#define FILE_SIZE       PAGE_SIZE * 4

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
    int fd, verify_fd, i;
    char *buf;

    printf("Read test starting\n");

    buf = malloc(FILE_SIZE);
    if(!buf) {
        fprintf(stderr, "error malloc!\n");
        return -1;
    }

    verify_fd = open(FILE_TO_READ, O_RDONLY, 0x0);
    fd = open(FILE_TO_READ, O_RDONLY, 0x0);
    if(fd == -1 || verify_fd == -1) {
        printf("error open\n");
        return -1;
    }

    if(read(verify_fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
        fprintf(stderr, "read error\n");
        return -1;
    }

    if(read(fd, buf, FILE_SIZE) != FILE_SIZE) {
        fprintf(stderr, "read error\n");
        return -1;
    }
    printf("Read in malloc'd buffer done\n");

    for(i=0; i<FILE_SIZE; i++)
        if(buf[i] != verify_buf[i]) {
            fprintf(stderr, "verification failed!\n");
            fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
            fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
            return -1;
        }

    printf("Read verification successful!\n");

    free(buf);
    close(fd);
    return 0;
}
/* write_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE       4096
#define FILE_SIZE       PAGE_SIZE * 4
#define FILE_TO_WRITE   "fwrite.bin"

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
    char *buf;
    int fd, i;

    printf("write test starting\n");

    buf = malloc(FILE_SIZE);
    if(!buf) {
        fprintf(stderr, "error malloc\n");
        return -1;
    }

    fd = open(FILE_TO_WRITE, O_RDWR | O_CREAT | O_TRUNC, 0666);
    if(fd == -1) {
        fprintf(stderr, "error opening %s\n", FILE_TO_WRITE);
        return -1;
    }

    for(i=0; i<FILE_SIZE; i++)
        buf[i] = (i/4096) + '0';

    if(write(fd, buf, FILE_SIZE) != FILE_SIZE) {
        fprintf(stderr, "error writing in %s", FILE_TO_WRITE);
        return -1;
    }
    printf("Write from malloc'd buffer done\n");

    if(lseek(fd, 0x0, SEEK_SET) != 0) {
        fprintf(stderr, "error lseek\n");
        return -1;
    }

    if(read(fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
        fprintf(stderr, "error read\n");
        return -1;
    }

    for(i=0; i<FILE_SIZE; i++)
        if(verify_buf[i] != buf[i]) {
            fprintf(stderr, "verification failed!\n");
            fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
            fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
            return -1;
        }

    printf("Write verification successful!\n");

    free(buf);
    close(fd);
    return 0;
}
# Makefile

HERMIT_LOCAL_INSTALL=/home/pierre/Desktop/HermitCore/prefix/
HERMIT_TOOLCHAIN_INSTALL=/opt/hermit/
READ_TEST=read_test
WRITE_TEST=write_test
READ_SAMPLE_FILE=fread.bin
WRITE_SAMPLE_FILE=fwrite.bin
READ_TEST_SRC=$(READ_TEST).c
WRITE_TEST_SRC=$(WRITE_TEST).c
CFLAGS=-Wall -Werror
LDFLAGS=-L$(HERMIT_LOCAL_INSTALL)/x86_64-hermit/lib
CC=$(HERMIT_TOOLCHAIN_INSTALL)/bin/x86_64-hermit-gcc
PROXY=$(HERMIT_LOCAL_INSTALL)/bin/proxy
VERBOSE?=0

all: $(READ_TEST) $(WRITE_TEST)

$(READ_TEST): $(READ_TEST_SRC) $(READ_SAMPLE_FILE)
    $(CC) $(CFLAGS) $(READ_TEST_SRC) -o $(READ_TEST) $(LDFLAGS)

$(WRITE_TEST): $(WRITE_TEST_SRC)
    $(CC) $(CFLAGS) $(WRITE_TEST_SRC) -o $(WRITE_TEST) $(LDFLAGS)

$(READ_SAMPLE_FILE):
    dd if=/dev/urandom of=$(READ_SAMPLE_FILE) bs=4K count=4

test: $(READ_TEST) $(WRITE_TEST)
    HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(READ_TEST)
    HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(WRITE_TEST)

clean:
    rm -rf $(READ_TEST) $(WRITE_TEST) *.o $(READ_SAMPLE_FILE) \
        $(WRITE_SAMPLE_FILE)

Here is my proposed solution: a patch that forces the file read and write operations to be performed on a page-by-page basis, i.e. Uhyve file read or write operation is called for each page composing the buffer. I did not extensively tested it, nor did I evaluated the performance impact (I guess it slows things down a bit).

diff --git a/kernel/syscall.c b/kernel/syscall.c
index d10691d..f7b7b00 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -159,6 +159,47 @@ typedef struct {
    ssize_t ret;
 } __attribute__((packed)) uhyve_read_t;

+/* Pages belonging to the heap are mapped on demand, and are not always
+ * contiguous in physical memory. Thus, we need to force allocation and call
+ * the hypervisor file read function page by page.
+ */
+ssize_t uhyve_noncontiguous_read(int fd, char *buf, size_t len) {
+   ssize_t bytes_read = 0;
+   size_t cur_len = 0;
+   uhyve_read_t arg = {fd, 0x0, 0x0, -1};
+
+   while(len) {
+
+       if(!((size_t)buf % PAGE_SIZE)) {
+           cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+       } else {
+           cur_len = PAGE_CEIL((size_t)buf) - (size_t)buf;
+           cur_len = (((size_t)buf + len) > PAGE_CEIL((size_t)buf))
+               ? (PAGE_CEIL((size_t)buf) - (size_t)buf) : len;
+       }
+
+       /* Force mapping */
+       memset(buf, 0x0, 1);
+
+       arg.buf = (char *)virt_to_phys((size_t)buf);
+       arg.len = cur_len;
+       arg.ret = -1;
+       uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&arg));
+
+       if(arg.ret < 0)
+           return arg.ret;
+
+       bytes_read += arg.ret;
+       if(arg.ret != cur_len)
+           break;
+
+       buf += cur_len;
+       len -= cur_len;
+   }
+
+   return bytes_read;
+}
+
 ssize_t sys_read(int fd, char* buf, size_t len)
 {
    sys_read_t sysargs = {__NR_read, fd, len};
@@ -174,11 +215,16 @@ ssize_t sys_read(int fd, char* buf, size_t len)
    }

    if (is_uhyve()) {
+
+       return uhyve_noncontiguous_read(fd, buf, len);
+
+#if 0
        uhyve_read_t uhyve_args = {fd, (char*) virt_to_phys((size_t) buf), len, -1};

        uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&uhyve_args));

        return uhyve_args.ret;
+#endif
    }

    spinlock_irqsave_lock(&lwip_lock);
@@ -222,6 +268,39 @@ typedef struct {
    size_t len;
 } __attribute__((packed)) uhyve_write_t;

+ssize_t uhyve_noncontiguous_write(int fd, const char *buf, size_t len) {
+   char *cur_buf = (char *)buf;
+   ssize_t bytes_written = 0;
+   size_t cur_len = 0;
+   uhyve_write_t arg = {fd, 0x0, 0x0};
+
+   while(len) {
+
+       if(!((size_t)cur_buf % PAGE_SIZE)) {
+           cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+       } else {
+           cur_len = (((size_t)cur_buf + len) > PAGE_CEIL((size_t)cur_buf))
+               ? (PAGE_CEIL((size_t)cur_buf) - (size_t)cur_buf) : len;
+       }
+
+       arg.buf = (char *)virt_to_phys((size_t)cur_buf);
+       arg.len = cur_len;
+       uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&arg));
+
+       if(arg.len == (size_t)(-1))
+           return -1;
+
+       bytes_written += arg.len;
+       if(arg.len != cur_len)
+           break;
+
+       cur_buf += cur_len;
+       len -= cur_len;
+   }
+
+   return bytes_written;
+}
+
 ssize_t sys_write(int fd, const char* buf, size_t len)
 {
    if (BUILTIN_EXPECT(!buf, 0))
@@ -240,11 +319,16 @@ ssize_t sys_write(int fd, const char* buf, size_t len)
    }

    if (is_uhyve()) {
+
+       return uhyve_noncontiguous_write(fd, buf, len);
+
+#if 0
        uhyve_write_t uhyve_args = {fd, (const char*) virt_to_phys((size_t) buf), len};

        uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&uhyve_args));

        return uhyve_args.len;
+#endif
    }

    spinlock_irqsave_lock(&lwip_lock);
stlankes commented 6 years ago

Hello Pierre,

you are right. I will create patch to solve this issue.

Best

Stefan

ColinFinck commented 6 years ago

@olivierpierre: I got to know about your problem, however not about your proposed solution. Anyway, I've committed a fix for HermitCore-rs (the Rust implementation of HermitCore), which lets uhyve perform the virtual-to-physical address translation for the buffers of read/write operations. This way, the physical memory addresses of the buffer pages can be arbitrary.

The fix commit is https://github.com/hermitcore/hermit-caves/commit/6b9e4be0dc4ad65411e87c674e38138cd215c47e and seems to have also been merged to the HermitCore C version in the meantime.

stlankes commented 6 years ago

@olivierpierre : Since yesterday I added a patch to the C version (see pull request 97). Could you test it on your system? By the way, I add also gdb support for aarch64...

olivierpierre commented 6 years ago

@ColinFinck: I believe my initial post includes a description of the problem as well as two sample programs that trigger the issue. Please let me know if you need something specific.

@stlankes: thank! I will try the patch on our version and let you know