yasukata / zpoline

system call hook for Linux
Apache License 2.0
393 stars 31 forks source link

Problems with SUD hooking #17

Open 98hq opened 2 months ago

98hq commented 2 months ago

Hello, can I get the code of the Simple HTTP server mentioned in the paper? Because I encountered some problems in the process of reproducing the paper. SUD can encounter problems when hooking multi-threaded programs, possibly because signals are modified when creating threads. I see that you handle this in your code, but it triggers a crash when I use it, so I'd like to get the procedure you evaluated to fix the problem.

yasukata commented 2 months ago

Thank you for your message.

can I get the code of the Simple HTTP server mentioned in the paper?

This is the code that I used.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/poll.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <errno.h>
#include <signal.h>

#define SESSION_MAX 1024
#define D(fmt, ...) \
    printf("[%s]: "fmt"\n", __func__, ##__VA_ARGS__)

static int do_abort = 0;

static void
sigint_h(int sig)
{
    (void)sig;  /* UNUSED */
    do_abort = 1;
    D("Stop process");
    signal(SIGINT, SIG_DFL);
}

static void
usage(void)
{
    fprintf(stderr,
        "usage: this_program -p [port] -l [msglen]\n");
    exit(1);
}

ssize_t
generate_httphdr(ssize_t content_length, char *buf, char *content)
{
    char *p = buf;
    /* From nginx */
    static char *lines[5] = {"HTTP/1.1 200 OK\r\n",
     "Content-Length: ",
     "Connection: keep-alive\r\n\r\n"};
    ssize_t l;

    memcpy(p, lines[0], strlen(lines[0]));
    p += strlen(lines[0]);
    memcpy(p, lines[1], strlen(lines[1]));
    p += strlen(lines[1]);
    l = sprintf(p, "%lu\r\n", content_length);
    p += l;
    memcpy(p, lines[2], strlen(lines[2]));
    p += strlen(lines[2]);

    if (content == NULL)
        memset(p, 'A', content_length);
    else
        memcpy(p, content, content_length);
    p += content_length;
    return p - buf;
}

int accept_session(int fd, int epfd)
{
    struct epoll_event ev;
    struct sockaddr_in caddr_in;
    socklen_t addrlen;
    int newfd;
    while ((newfd = accept(fd, (struct sockaddr *)&caddr_in, &addrlen)) != -1) {
        memset(&ev, 0, sizeof(ev));
        ev.events = POLLIN;
        ev.data.fd = newfd;
        epoll_ctl(epfd, EPOLL_CTL_ADD, newfd, &ev);
    }
    return 0;
}

int pkt_processing(int fd, ssize_t msglen)
{
    ssize_t len;
    static char buf[70000];
    len = read(fd, buf, sizeof(buf));
    if (len == 0) {
        close(fd);
        return 0;
    } else if (len < 0) {
        D("read fail, fd %d, ret %ld", fd, len);
        return -1;
    }

    if (strncmp(buf, "GET ", strlen("GET ")) == 0) {
        len = generate_httphdr(msglen, buf, NULL);
    }
    if (write(fd, buf, len) < 0) {
        perror("write");
        return -1;
    }
    return 0;
}

int
main(int argc, char **argv)
{
    int ch;
    int tcp_sockfd;
    struct sockaddr_in saddr_in;
    int yes;
    int port = 0;
    int val = 1;
    int epfd;
    struct epoll_event ev;
    struct epoll_event evts[SESSION_MAX];
    ssize_t msglen = 1;

    fprintf(stderr, "%s built %s %s\n",
        argv[0], __DATE__, __TIME__);

    while ((ch = getopt(argc, argv, "p:l:")) != -1) {
        switch (ch) {
        default:
            D("bad option %c %s", ch, optarg);
            usage();
            break;
        case 'p':   /* server port */
            if (port == 0)
                port = atoi(optarg);
            else
                D("%s ignored, already have 1 interface",
                    optarg);
            break;
        case 'l':
            msglen = atoi(optarg);
            break;
        }
    }

    argc -= optind;
    argv += optind;

    if (port == 0) {
        D("missing port number");
        usage();
        return -1;
    }

    epfd = epoll_create1(EPOLL_CLOEXEC);
    if (epfd == -1) {
        D("epoll create failed");
        goto close;
    }

    D("Linux TCP stack, msglen (%lu)", msglen);

    tcp_sockfd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
    if (tcp_sockfd < 0) {
        perror("socket");
        goto close_fra;
    }
    yes = 1;
    if (setsockopt(tcp_sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
        perror("setsockopt");
        goto close_sock;
    }
    yes = 1;
    if (setsockopt(tcp_sockfd, SOL_TCP, TCP_NODELAY, &yes, sizeof(yes)) < 0) {
        perror("setsockopt");
        goto close_sock;
    }
    if (ioctl(tcp_sockfd, FIONBIO, &val) < 0) {
        perror("ioctl");
        goto close_sock;
    }

    memset(&saddr_in, 0, sizeof(struct sockaddr_in));
    saddr_in.sin_family = AF_INET;
    saddr_in.sin_addr.s_addr = htonl(INADDR_ANY);
    saddr_in.sin_port = htons(port);
    if (bind(tcp_sockfd, (struct sockaddr *)&saddr_in, sizeof(saddr_in)) < 0) {
        perror("bind");
        goto close_sock;
    }

    if (listen(tcp_sockfd, SOMAXCONN) != 0) {
        perror("listen");
        goto close_sock;
    }

    memset(&ev, 0, sizeof(ev));
    ev.events = POLLIN;
    ev.data.fd = tcp_sockfd;
    epoll_ctl(epfd, EPOLL_CTL_ADD, tcp_sockfd, &ev);

    signal(SIGINT, sigint_h);

    while (!do_abort) {
        int i, nfd;
        nfd = epoll_wait(epfd, evts, SESSION_MAX, -1); // If we specify -1 here, epoll blocks
        for (i = 0; i < nfd; i++) {
            if (evts[i].data.fd == tcp_sockfd) {
                if (accept_session(tcp_sockfd, epfd) < 0)
                    goto close_sock;
            } else 
                pkt_processing(evts[i].data.fd, msglen);
        }

    }

close_sock:
    close(tcp_sockfd);
close_fra:
    close(epfd);
close:
    return (0);
}

I hope this works in your environment.

Thank you very much for your interest.

98hq commented 2 months ago

Thank you for your message.

can I get the code of the Simple HTTP server mentioned in the paper?

This is the code that I used.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/poll.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <errno.h>
#include <signal.h>

#define SESSION_MAX 1024
#define D(fmt, ...) \
  printf("[%s]: "fmt"\n", __func__, ##__VA_ARGS__)

static int do_abort = 0;

static void
sigint_h(int sig)
{
  (void)sig;  /* UNUSED */
  do_abort = 1;
  D("Stop process");
  signal(SIGINT, SIG_DFL);
}

static void
usage(void)
{
  fprintf(stderr,
      "usage: this_program -p [port] -l [msglen]\n");
  exit(1);
}

ssize_t
generate_httphdr(ssize_t content_length, char *buf, char *content)
{
  char *p = buf;
  /* From nginx */
  static char *lines[5] = {"HTTP/1.1 200 OK\r\n",
   "Content-Length: ",
   "Connection: keep-alive\r\n\r\n"};
  ssize_t l;

  memcpy(p, lines[0], strlen(lines[0]));
  p += strlen(lines[0]);
  memcpy(p, lines[1], strlen(lines[1]));
  p += strlen(lines[1]);
  l = sprintf(p, "%lu\r\n", content_length);
  p += l;
  memcpy(p, lines[2], strlen(lines[2]));
  p += strlen(lines[2]);

  if (content == NULL)
      memset(p, 'A', content_length);
  else
      memcpy(p, content, content_length);
  p += content_length;
  return p - buf;
}

int accept_session(int fd, int epfd)
{
  struct epoll_event ev;
  struct sockaddr_in caddr_in;
  socklen_t addrlen;
  int newfd;
  while ((newfd = accept(fd, (struct sockaddr *)&caddr_in, &addrlen)) != -1) {
      memset(&ev, 0, sizeof(ev));
      ev.events = POLLIN;
      ev.data.fd = newfd;
      epoll_ctl(epfd, EPOLL_CTL_ADD, newfd, &ev);
  }
  return 0;
}

int pkt_processing(int fd, ssize_t msglen)
{
  ssize_t len;
  static char buf[70000];
  len = read(fd, buf, sizeof(buf));
  if (len == 0) {
      close(fd);
      return 0;
  } else if (len < 0) {
      D("read fail, fd %d, ret %ld", fd, len);
      return -1;
  }

  if (strncmp(buf, "GET ", strlen("GET ")) == 0) {
      len = generate_httphdr(msglen, buf, NULL);
  }
  if (write(fd, buf, len) < 0) {
      perror("write");
      return -1;
  }
  return 0;
}

int
main(int argc, char **argv)
{
  int ch;
  int tcp_sockfd;
  struct sockaddr_in saddr_in;
  int yes;
  int port = 0;
  int val = 1;
  int epfd;
  struct epoll_event ev;
  struct epoll_event evts[SESSION_MAX];
  ssize_t msglen = 1;

  fprintf(stderr, "%s built %s %s\n",
      argv[0], __DATE__, __TIME__);

  while ((ch = getopt(argc, argv, "p:l:")) != -1) {
      switch (ch) {
      default:
          D("bad option %c %s", ch, optarg);
          usage();
          break;
      case 'p':   /* server port */
          if (port == 0)
              port = atoi(optarg);
          else
              D("%s ignored, already have 1 interface",
                  optarg);
          break;
      case 'l':
          msglen = atoi(optarg);
          break;
      }
  }

  argc -= optind;
  argv += optind;

  if (port == 0) {
      D("missing port number");
      usage();
      return -1;
  }

  epfd = epoll_create1(EPOLL_CLOEXEC);
  if (epfd == -1) {
      D("epoll create failed");
      goto close;
  }

  D("Linux TCP stack, msglen (%lu)", msglen);

  tcp_sockfd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
  if (tcp_sockfd < 0) {
      perror("socket");
      goto close_fra;
  }
  yes = 1;
  if (setsockopt(tcp_sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
      perror("setsockopt");
      goto close_sock;
  }
  yes = 1;
  if (setsockopt(tcp_sockfd, SOL_TCP, TCP_NODELAY, &yes, sizeof(yes)) < 0) {
      perror("setsockopt");
      goto close_sock;
  }
  if (ioctl(tcp_sockfd, FIONBIO, &val) < 0) {
      perror("ioctl");
      goto close_sock;
  }

  memset(&saddr_in, 0, sizeof(struct sockaddr_in));
  saddr_in.sin_family = AF_INET;
  saddr_in.sin_addr.s_addr = htonl(INADDR_ANY);
  saddr_in.sin_port = htons(port);
  if (bind(tcp_sockfd, (struct sockaddr *)&saddr_in, sizeof(saddr_in)) < 0) {
      perror("bind");
      goto close_sock;
  }

  if (listen(tcp_sockfd, SOMAXCONN) != 0) {
      perror("listen");
      goto close_sock;
  }

  memset(&ev, 0, sizeof(ev));
  ev.events = POLLIN;
  ev.data.fd = tcp_sockfd;
  epoll_ctl(epfd, EPOLL_CTL_ADD, tcp_sockfd, &ev);

  signal(SIGINT, sigint_h);

  while (!do_abort) {
      int i, nfd;
      nfd = epoll_wait(epfd, evts, SESSION_MAX, -1); // If we specify -1 here, epoll blocks
      for (i = 0; i < nfd; i++) {
          if (evts[i].data.fd == tcp_sockfd) {
              if (accept_session(tcp_sockfd, epfd) < 0)
                  goto close_sock;
          } else 
              pkt_processing(evts[i].data.fd, msglen);
      }

  }

close_sock:
  close(tcp_sockfd);
close_fra:
  close(epfd);
close:
  return (0);
}

I hope this works in your environment.

Thank you very much for your interest.

thank you for your reply. Your experimental description in Section 3.3 of the paper is as follows:

This section evaluates how zpoline affects the performance of application programs backed by user-space OS subsystems; 
we employ zpoline and the existing hook mechanisms described in § 3.1 to transparently apply a portable TCP/IP stack, lwIP [10], backed by Data Plane Development Kit (DPDK) [15], to a simple HTTP server and Redis [35]. 
Normally, kernel-bypassing lwIP achieves higher networking performance than the kernel TCP/IP stack of Linux [4,32]; for reference, we run the same benchmarks using the kernel TCP/IP stack of Linux and report its performance by nondotted horizontal lines in Figure 3. 
We note that the simple HTTP server and Redis are chosen for the experiments because LD_PRELOAD could apply hooks to them, and as explained in § 3.1.4, LD_PRELOAD can fail to hook system calls in other systems.

I would like to know the experimental details on the performance of the user space subsystem in 3.3 of your paper. Are you applying the hooking mechanism to simple HTTP server and Redis? Or use the hooking mechanism to implement lwIP. My understanding is to apply the hooking mechanism to simple HTTP server and Redis, because these programs are very simple. But I don't understand the role of IwIP here, can you explain this experiment more?

98hq commented 2 months ago

When I compile the server code into a 64-bit program in Ubuntu, everything works fine, but when I compile it into a 32-bit program, it always fails to work. Can you provide me with some solution ideas?

yasukata commented 1 month ago

Are you applying the hooking mechanism to simple HTTP server and Redis?

We hook system calls invoked by the simple HTTP server and Redis using the hooking mechanisms.

Or use the hooking mechanism to implement lwIP.

We do not implement lwIP. lwIP is a publicly available portable TCP/IP stack implementation.

can you explain this experiment more?

The background of this experiment is as follows:

For this experiment, we made a glue program that hooks several system calls, such as socket and read/write for socket file descriptors, and redirects them to lwIP-specific functions whose functionalities correspond to the hooked system calls; by using this glue program, we can let the simple HTTP server and Redis use lwIP and DPDK, instead of the kernel-space TCP/IP stack, without modifying the source code of them.

The glue program used for this experiment is found at https://github.com/yasukata/glue-lwip-dpdk-zpoline . Please carefully check the WARNING sections in README if you think of trying this.

Once the program in glue-lwip-dpdk-zpoline is compiled according to the instruction in its README and the simple HTTP server program shown in https://github.com/yasukata/zpoline/issues/17#issuecomment-2101727538 is compiled as glue-lwip-dpdk-zpoline/a.out, the following command performed in the same glue-lwip-dpdk-zpoline directory will launch the simple HTTP server while applying system call hooks to leverage lwIP on DPDK instead of the kernel-space TCP/IP stack.

sudo NET_ADDR=10.100.0.20 NET_MASK=255.255.255.0 NET_GATE=10.100.0.1 DPDK_ARGS="-l 0 --vdev=net_tap,iface=tap001 --no-pci" LD_LIBRARY_PATH=./dpdk/install/lib/x86_64-linux-gnu LIBZPHOOK=./libzphook_lwip.so LD_PRELOAD=./zpoline/libzpoline.so ./a.out -p 10000 -l 2

To check the behavior, please open another console/terminal, and type the following to assign an IP address to the virtual network interface tap001 made by DPDK,

sudo ifconfig tap001 10.100.0.1 netmask 255.255.255.0

and the following telnet command connects to the simple HTTP server.

telnet 10.100.0.20 10000

When you type four characters,G E T ``` (the last one is space), intelnet```, you will get the output like this.

Trying 10.100.0.20...
Connected to 10.100.0.20.
Escape character is '^]'.
GET 
HTTP/1.1 200 OK
Content-Length: 2
Connection: keep-alive

AA

Here, the payload of this HTTP OK message is made by the simple HTTP server, but the TCP/IP packet processing is performed by lwIP rather than the kernel-space TCP/IP stack.

When I compile the server code into a 64-bit program in Ubuntu, everything works fine, but when I compile it into a 32-bit program, it always fails to work. Can you provide me with some solution ideas?

I am sorry, but I have no idea about the solution to this issue.

Thank you very much for your questions.

98hq commented 2 weeks ago

Are you applying the hooking mechanism to simple HTTP server and Redis?

We hook system calls invoked by the simple HTTP server and Redis using the hooking mechanisms.

Or use the hooking mechanism to implement lwIP.

We do not implement lwIP. lwIP is a publicly available portable TCP/IP stack implementation.

can you explain this experiment more?

The background of this experiment is as follows:

  • TCP/IP stacks are software that is in charge of network protocol processing and are typically implemented in OS kernels.
  • The Linux kernel also implements its own TCP/IP stack, and user-space programs such as the simple HTTP server and Redis use the kernel-space TCP/IP stack by default.
  • On the other hand, it is known that lwIP running on DPDK, a fast packet I/O framework, can sometimes achieve higher networking performance compared to the kernel-space TCP/IP stack.
  • However, the issue is that when we wish to use lwIP rather than the kernel-space TCP/IP stack, normally we need to change the source code of existing programs such as the simple HTTP server and Redis to apply the lwIP-specific API.
  • We employ system call hook mechanisms to address this issue; system call hook mechanisms allow us to transparently glue lwIP on DPDK and the unmodified application binaries of the simple HTTP server and Redis.

For this experiment, we made a glue program that hooks several system calls, such as socket and read/write for socket file descriptors, and redirects them to lwIP-specific functions whose functionalities correspond to the hooked system calls; by using this glue program, we can let the simple HTTP server and Redis use lwIP and DPDK, instead of the kernel-space TCP/IP stack, without modifying the source code of them.

The glue program used for this experiment is found at https://github.com/yasukata/glue-lwip-dpdk-zpoline . Please carefully check the WARNING sections in README if you think of trying this.

Once the program in glue-lwip-dpdk-zpoline is compiled according to the instruction in its README and the simple HTTP server program shown in #17 (comment) is compiled as glue-lwip-dpdk-zpoline/a.out, the following command performed in the same glue-lwip-dpdk-zpoline directory will launch the simple HTTP server while applying system call hooks to leverage lwIP on DPDK instead of the kernel-space TCP/IP stack.

sudo NET_ADDR=10.100.0.20 NET_MASK=255.255.255.0 NET_GATE=10.100.0.1 DPDK_ARGS="-l 0 --vdev=net_tap,iface=tap001 --no-pci" LD_LIBRARY_PATH=./dpdk/install/lib/x86_64-linux-gnu LIBZPHOOK=./libzphook_lwip.so LD_PRELOAD=./zpoline/libzpoline.so ./a.out -p 10000 -l 2

To check the behavior, please open another console/terminal, and type the following to assign an IP address to the virtual network interface tap001 made by DPDK,

sudo ifconfig tap001 10.100.0.1 netmask 255.255.255.0

and the following telnet command connects to the simple HTTP server.

telnet 10.100.0.20 10000

When you type four characters,G E T ` (the last one is space), intelnet`, you will get the output like this.

Trying 10.100.0.20...
Connected to 10.100.0.20.
Escape character is '^]'.
GET 
HTTP/1.1 200 OK
Content-Length: 2
Connection: keep-alive

AA

Here, the payload of this HTTP OK message is made by the simple HTTP server, but the TCP/IP packet processing is performed by lwIP rather than the kernel-space TCP/IP stack.

When I compile the server code into a 64-bit program in Ubuntu, everything works fine, but when I compile it into a 32-bit program, it always fails to work. Can you provide me with some solution ideas?

I am sorry, but I have no idea about the solution to this issue.

Thank you very much for your questions.

I solved the error encountered in compiling the 32-bit server program.

When I tried the glue program in https://github.com/yasukata/glue-lwip-dpdk-zpoline. It works in a 64-bit environment. Then I want to use some hooking technology to make the 32-bit program apply Iwip on DPDK.

But when I use the simple HTTP server program shown in #17 (comment), it doesn't work. Debugging it through GDB, I found that it keeps waiting when executing the epoll_wait function. I tested it through wrk in another terminal and nothing happened. I think I may need to change the code in https://github.com/yasukata/glue-lwip-dpdk-zpoline to make it work on 32-bit programs, but I don't know where to start. Can you give me some advice?

I executed the following command in a terminal:

sudo NET_ADDR=10.100.0.20 NET_MASK=255.255.255.0 NET_GATE=10.100.0.1 DPDK_ARGS="-l 0 --vdev=net_tap,iface=tap001 --no-pci --no-huge" LD_LIBRARY_PATH=./dpdk/install/lib/i386-linux-gnu LIBZPHOOK=./libzphook_lwip.so LD_PRELOAD=./sud/libzpoline.so ./server32 -p 10000 -l 1

Then, I executed the following command in another terminal

sudo ifconfig tap001 10.100.0.1 netmask 255.255.255.0
wrk http://10.100.0.20:10000/ -d 10 -t 1 -c 1 -L

wrk can't connect properly, but I can ping 10.100.0.20

yasukata commented 1 week ago

But when I use the simple HTTP server program shown in https://github.com/yasukata/zpoline/issues/17#issuecomment-2101727538, it doesn't work. Debugging it through GDB, I found that it keeps waiting when executing the epoll_wait function. I tested it through wrk in another terminal and nothing happened. I think I may need to change the code in https://github.com/yasukata/glue-lwip-dpdk-zpoline to make it work on 32-bit programs, but I don't know where to start. Can you give me some advice?

I am sorry, but I do not have an exact idea for this.

One thing in my mind is that the C program shown in this repository may not be able to run on 32-bit platforms because it contains assembly code using 64-bit registers such as rax; I am also not sure if this is the only issue, and there would be more.

98hq commented 1 week ago

But when I use the simple HTTP server program shown in #17 (comment), it doesn't work. Debugging it through GDB, I found that it keeps waiting when executing the epoll_wait function. I tested it through wrk in another terminal and nothing happened. I think I may need to change the code in https://github.com/yasukata/glue-lwip-dpdk-zpoline to make it work on 32-bit programs, but I don't know where to start. Can you give me some advice?

I am sorry, but I do not have an exact idea for this.

One thing in my mind is that the C program shown in this repository may not be able to run on 32-bit platforms because it contains assembly code using 64-bit registers such as rax; I am also not sure if this is the only issue, and there would be more.

Thanks for your reply. I fine-tuned the source code of main.c in the glue-lwip-dpdk-zpoline library, and then it worked in 32-bit programs. The reason for the problem before was the difference in system calls between 64-bit programs and 32-bit programs that I didn't take into account.