yasukata / zpoline

system call hook for Linux
Apache License 2.0
408 stars 32 forks source link

How to use ld_preload and ptrace to hook the program so that the program can use lwip API #19

Open 98hq opened 1 month ago

98hq commented 1 month ago

Hello, I used the code of glue-lwip-dpdk-zpoline and successfully used zpoline to improve the efficiency of a program. Then I want to test the efficiency improvement of the program after applying lwip API compared with other hook mechanisms. I can test SUD and int3 signaling successfully, but when I tested ld_preload, it crashed. Can I know the source code of your program using ld_preload to use lwip API?

This is the hook code I wrote for LD_PRELOAD, it is compiled into libpreload.so and then loaded via LD_PRELOAD.

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <signal.h>
#include <sched.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
#include <errno.h>
#include <dlfcn.h>
#include <elf.h>
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
#include <sys/mman.h>
#include <inttypes.h>

#include <fcntl.h>
#include <syscall.h>
#include <sys/epoll.h>
#include <linux/net.h>
#include <arpa/inet.h>

#include <sys/types.h>
#include <sys/ioctl.h>
#include <stdarg.h>

typedef long (*syscall_fn_t)(long, long, long, long, long, long, long);

static syscall_fn_t hook_function = NULL;

int socket(int domain, int type, int protocol)
{
    return hook_function(__NR_socket,(long)domain, (long)type, (long)protocol,0,0,0);
}
int close(int fd)
{
    return hook_function(__NR_close,(long)fd, 0, 0,0,0,0);
}
int fcntl(int fd, int cmd, ...  )
{
    va_list args;
    va_start(args, cmd);
    switch (cmd) {
    case F_DUPFD:
    case F_DUPFD_CLOEXEC:
    case F_SETFD:
    case F_SETFL:
    case F_SETOWN:
    case F_SETSIG:
    case F_SETLEASE:
    case F_NOTIFY:
        return hook_function(__NR_fcntl,(long)fd, (long)cmd, (long)(va_arg(args, int)),0,0,0);  
    case F_GETLK:
    case F_SETLK:
    case F_SETLKW:
    case F_GETOWN_EX:
    case F_SETOWN_EX:
    case F_GETSIG:
    case F_GETLEASE:
        return hook_function(__NR_fcntl,(long)fd, (long)cmd, (long)(va_arg(args, void *)),0,0,0);

    default:
         return hook_function(__NR_fcntl,(long)fd, (long)cmd, 0,0,0,0); 
    }
    va_end(args);

}
ssize_t read(int fd, void *buf, size_t count)
{
    return  hook_function(__NR_read,(long)fd, (long)buf, (long)count,0,0,0);
}
ssize_t write(int fd, const void *buf, size_t count)
{
    return hook_function(__NR_write,(long)fd, (long)buf, (long)count,0,0,0);
}
int ioctl(int fd, unsigned long request, ...)
{
    va_list args;
    va_start(args, request);
    void *arg = va_arg(args, void *);
    va_end(args);
    return hook_function(__NR_ioctl,(long)fd, (long)request, (long)arg,0,0,0);
}
int bind(int sockfd, const struct sockaddr *addr,socklen_t addrlen)
{
    return hook_function(__NR_bind,(long)sockfd, (long)addr, (long)addrlen,0,0,0);
}
int listen(int sockfd, int backlog)
{   
    return hook_function(__NR_listen,(long)sockfd, (long)backlog, 0,0,0,0);
}
int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
{
    return hook_function(__NR_accept4,(long)sockfd, (long)addr, (long)addrlen,0,0,0);
}
int getsockopt(int sockfd, int level, int optname, void *optval, socklen_t *optlen)
{
    return hook_function(__NR_getsockopt,(long)sockfd, (long)level, (long)optname, (long)optval, (long)optlen,0);
}
int setsockopt(int sockfd, int level, int optname,const void *optval, socklen_t optlen)
{
    return hook_function(__NR_setsockopt,(long)sockfd, (long)level, (long)optname, (long)optval, (long)optlen,0);
}
int epoll_create(int size)
{
    return hook_function(__NR_epoll_create,(long)size, 0, 0, 0, 0,0);       
}
int epoll_create1(int flags)
{
    return hook_function(__NR_epoll_create1,(long)flags, 0, 0, 0, 0,0);
}
int epoll_wait(int epfd, struct epoll_event *events,int maxevents, int timeout)
{
    return hook_function(__NR_epoll_wait,(long)epfd, (long)events, (long)maxevents,(long)timeout, 0,0);
}
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
{
    return hook_function(__NR_epoll_ctl,(long)epfd, (long)op, (long)fd,(long)event, 0,0);
}
__attribute__((constructor(0xffff))) static void __preload_init(void)
{
    void *handle;
    {
        const char *filename;
        filename = getenv("LIBZPHOOK");
        if (!filename) {
            printf("env LIBZPHOOK is empty, so skip to load a hook library\n");
            return;
        }

        handle = dlmopen(LM_ID_NEWLM, filename, RTLD_NOW | RTLD_LOCAL);
        if (!handle) {
            printf("\n");
            printf("dlmopen failed: %s\n", dlerror());
            printf("\n");
            printf("NOTE: this may occur when the compilation of your hook function library misses some specifications in LDFLAGS. or if you are using a C++ compiler, dlmopen may fail to find a symbol, and adding 'extern \"C\"' to the definition may resolve the issue.\n");
            exit(1);
        }
    }
    {
        int (*hook_init)(void);
        hook_init = dlsym(handle, "__hook_init");
        assert(hook_init);
        assert(hook_init() == 0);
    }

    if(!hook_function)
    {
        hook_function = dlsym(handle, "__hook_function");
        assert(hook_function);
    }
}

I basically did not modify main.c in glue-lwip-dpdk-zpoline , but replaced the places where next_sys_call appeared with the corresponding original system call name.

This is the source code of main.c after I modified it: main.c

This is the command I use to start the program:

sudo NET_ADDR=10.100.0.20 NET_MASK=255.255.255.0 NET_GATE=10.100.0.1 DPDK_ARGS="-l 0 --vdev=net_tap,iface=tap001 --no-pci" LD_LIBRARY_PATH=./dpdk/install/lib/x86_64-linux-gnu LIBZPHOOK=./libzphook_lwip.so LD_PRELOAD=./ld_preload/libpreload.so PATH_TO/redis-server --protected-mode no

When the hooked object is redis-server, the following error occurs: Accepting client connection: accept: Socket operation on non-socket

When the hooked object is a simple server program, the program crashes after connecting. After debugging it, I found that the accept return value is often 0xfffffff5. The source code of the simple server program is at server program

In addition, I would like to know how you use ptrace to let the program apply lwip's API. As we discussed before, ptrace cannot jump over the original system call #10(comment). How do you avoid this problem?

Looking forward to your reply.

yasukata commented 1 month ago

I can test SUD and int3 signaling successfully, but when I tested ld_preload, it crashed. Can I know the source code of your program using ld_preload to use lwip API?

To use LD_PRELOAD with https://github.com/yasukata/glue-lwip-dpdk-zpoline , please try the following by replacing the content of glue-lwip-dpdk-zpoline/zpoline/main.c.

glue-lwip-dpdk-zpoline/zpoline/main.c ```c #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include extern void syscall_addr(void); extern long enter_syscall(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t); extern void asm_syscall_hook(void); void ____asm_impl(void) { /* * enter_syscall triggers a kernel-space system call */ asm volatile ( ".globl enter_syscall \n\t" "enter_syscall: \n\t" "movq %rdi, %rax \n\t" "movq %rsi, %rdi \n\t" "movq %rdx, %rsi \n\t" "movq %rcx, %rdx \n\t" "movq %r8, %r10 \n\t" "movq %r9, %r8 \n\t" "movq 8(%rsp),%r9 \n\t" ".globl syscall_addr \n\t" "syscall_addr: \n\t" "syscall \n\t" "ret \n\t" ); } static long (*hook_fn)(int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7) = enter_syscall; long syscall_hook(int64_t rdi, int64_t rsi, int64_t rdx, int64_t __rcx __attribute__((unused)), int64_t r8, int64_t r9, int64_t r10_on_stack /* 4th arg for syscall */, int64_t rax_on_stack, int64_t retptr __attribute__((unused)) ) { return hook_fn(rax_on_stack, rdi, rsi, rdx, r10_on_stack, r8, r9); } static void load_hook_lib(void) { void *handle; { const char *filename; filename = getenv("LIBZPHOOK"); if (!filename) { printf("-- env LIBZPHOOK is empty, so skip to load a hook library\n"); return; } printf("-- load %s\n", filename); handle = dlmopen(LM_ID_NEWLM, filename, RTLD_NOW | RTLD_LOCAL); if (!handle) { printf("\n"); printf("dlmopen failed: %s\n", dlerror()); printf("\n"); printf("NOTE: this may occur when the compilation of your hook function library misses some specifications in LDFLAGS. or if you are using a C++ compiler, dlmopen may fail to find a symbol, and adding 'extern \"C\"' to the definition may resolve the issue.\n"); exit(1); } } { int (*hook_init)(long, ...); hook_init = dlsym(handle, "__hook_init"); assert(hook_init); printf("-- call hook init\n"); assert(hook_init(0, &hook_fn) == 0); } } #include #include #include #include #include #include #include #include ssize_t read(int fd, void *buf, size_t count) { long ret = syscall_hook(fd, (int64_t) buf, count, 0, 0, 0, 0, __NR_read, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } ssize_t write(int fd, const void *buf, size_t count) { long ret = syscall_hook(fd, (int64_t) buf, count, 0, 0, 0, 0, __NR_write, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int close(int fd) { long ret = syscall_hook(fd, 0, 0, 0, 0, 0, 0, __NR_close, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int ioctl(int fd, unsigned long request, ...) { // FIXME long ret = syscall_hook(fd, request, 0, 0, 0, 0, 0, __NR_ioctl, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) { long ret = syscall_hook(sockfd, (int64_t) addr, (int64_t) addrlen, 0, 0, 0, 0, __NR_accept, 0); if (ret < 0) { errno = -ret; return -1; } else return ret; } int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) { long ret = syscall_hook(sockfd, (int64_t) addr, (int64_t) addrlen, flags, 0, 0, 0, __NR_accept4, 0); if (ret < 0) { errno = -ret; return -1; } else return ret; } int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { long ret = syscall_hook(sockfd, (int64_t) addr, (int64_t) addrlen, 0, 0, 0, 0, __NR_bind, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int listen(int sockfd, int backlog) { long ret = syscall_hook(sockfd, backlog, 0, 0, 0, 0, 0, __NR_listen, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int getsockopt(int sockfd, int level, int optname, void *optval, socklen_t *optlen) { long ret = syscall_hook(sockfd, level, optname, 0, (int64_t) optlen, 0, (int64_t) optval, __NR_getsockopt, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int setsockopt(int sockfd, int level, int optname, const void *optval, socklen_t optlen) { long ret = syscall_hook(sockfd, level, optname, 0, (int64_t) optlen, 0, (int64_t) optval, __NR_setsockopt, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int fcntl(int fd, int cmd, ... /* arg */ ) { // FIXME long ret = syscall_hook(fd, cmd, 0, 0, 0, 0, 0, __NR_fcntl, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int socket(int domain, int type, int protocol) { long ret = syscall_hook(domain, type, protocol, 0, 0, 0, 0, __NR_socket, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int epoll_create(int size) { long ret = syscall_hook(size, 0, 0, 0, 0, 0, 0, __NR_epoll_create, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int epoll_create1(int flags) { long ret = syscall_hook(flags, 0, 0, 0, 0, 0, 0, __NR_epoll_create1, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) { long ret = syscall_hook(epfd, (int64_t) events, maxevents, 0, 0, 0, timeout, __NR_epoll_wait, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) { long ret = syscall_hook(epfd, op, fd, 0, 0, 0, (int64_t) event, __NR_epoll_ctl, 0); if (ret < 0) { errno = ret; return -1; } else return ret; } __attribute__((constructor(0xffff))) static void __do_hook_init(void) { load_hook_lib(); } ```

Supposedly, the program above can be compiled by make -C zpoline typed in glue-lwip-dpdk-zpoline, and the command shown in README of https://github.com/yasukata/glue-lwip-dpdk-zpoline can be used for running it.

In addition, I would like to know how you use ptrace to let the program apply lwip's API.

To use ptrace with https://github.com/yasukata/glue-lwip-dpdk-zpoline , please try the following by replacing the content of glue-lwip-dpdk-zpoline/main.c and glue-lwip-dpdk-zpoline/Makefile.

glue-lwip-dpdk-zpoline/main.c ```c #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* workaround to avoid conflicts between dpdk and lwip definitions */ #undef IP_DF #undef IP_MF #undef IP_RF #undef IP_OFFMASK #include #include #include #include #include #include #include #include #include #include static ssize_t copy_between_user(pid_t pid, void *uaddr, void *buf, size_t count, bool to) { ssize_t ret; const struct iovec local_iov = { .iov_base = buf, .iov_len = count, }; const struct iovec remote_iov = { .iov_base = uaddr, .iov_len = count, }; if (to) ret = process_vm_writev(pid, &local_iov, 1, &remote_iov, 1, 0); else ret = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0); assert(ret >= 0); return ret; } static pid_t pid; static ssize_t copy_to_user(void *uaddr, void *buf, size_t count) { return copy_between_user(pid, uaddr, buf, count, true); } static ssize_t copy_from_user(void *uaddr, void *buf, size_t count) { return copy_between_user(pid, uaddr, buf, count, false); } typedef long (*syscall_fn_t)(long, long, long, long, long, long, long); static syscall_fn_t next_sys_call = NULL; #define MAX_PKT_BURST (32) #define NUM_SLOT (256) #define MEMPOOL_CACHE_SIZE (256) #define PACKET_BUF_SIZE (1518) static struct rte_mempool *pktmbuf_pool = NULL; static int tx_idx = 0; static struct rte_mbuf *tx_mbufs[MAX_PKT_BURST] = { 0 }; static void tx_flush(void) { int xmit = tx_idx, xmitted = 0; while (xmitted != xmit) xmitted += rte_eth_tx_burst(0 /* port id */, 0 /* queue id */, &tx_mbufs[xmitted], xmit - xmitted); tx_idx = 0; } static err_t low_level_output(struct netif *netif __attribute__((unused)), struct pbuf *p) { char buf[PACKET_BUF_SIZE]; void *bufptr, *largebuf = NULL; if (sizeof(buf) < p->tot_len) { largebuf = (char *) malloc(p->tot_len); assert(largebuf); bufptr = largebuf; } else bufptr = buf; pbuf_copy_partial(p, bufptr, p->tot_len, 0); assert((tx_mbufs[tx_idx] = rte_pktmbuf_alloc(pktmbuf_pool)) != NULL); assert(p->tot_len <= RTE_MBUF_DEFAULT_BUF_SIZE); rte_memcpy(rte_pktmbuf_mtod(tx_mbufs[tx_idx], void *), bufptr, p->tot_len); rte_pktmbuf_pkt_len(tx_mbufs[tx_idx]) = rte_pktmbuf_data_len(tx_mbufs[tx_idx]) = p->tot_len; if (++tx_idx == MAX_PKT_BURST) tx_flush(); if (largebuf) free(largebuf); return ERR_OK; } #define MAX_ACCEPT_FD (512) #define MAX_RXPBUF (512) #define MAX_FD (1024) struct lwip_fd { char used; char close_posted; unsigned short num_accept_fd; int accept_fd[MAX_ACCEPT_FD]; size_t tmp_pbuf_off; unsigned short num_rxpbuf; struct pbuf *rxpbuf[MAX_RXPBUF]; struct tcp_pcb *tpcb; int epfd; }; struct lwip_fd lfd[MAX_FD] = { 0 }; #define MAX_EPOLL_FD (512) struct epoll_fd { char used; int num_fd; int fd[MAX_EPOLL_FD]; }; struct epoll_fd efd[MAX_FD] = { 0 }; static struct netif _netif = { 0 }; static int close_post_cnt = 0; static int close_post_queue[MAX_FD] = { 0 }; static void dpdk_poll(void) { struct rte_mbuf *rx_mbufs[MAX_PKT_BURST]; unsigned short i, nb_rx = rte_eth_rx_burst(0 /* port id */, 0 /* queue id */, rx_mbufs, MAX_PKT_BURST); for (i = 0; i < nb_rx; i++) { { struct pbuf *p; assert((p = pbuf_alloc(PBUF_RAW, rte_pktmbuf_pkt_len(rx_mbufs[i]), PBUF_POOL)) != NULL); pbuf_take(p, rte_pktmbuf_mtod(rx_mbufs[i], void *), rte_pktmbuf_pkt_len(rx_mbufs[i])); p->len = p->tot_len = rte_pktmbuf_pkt_len(rx_mbufs[i]); assert(_netif.input(p, &_netif) == ERR_OK); } rte_pktmbuf_free(rx_mbufs[i]); } tx_flush(); sys_check_timeouts(); } static int lwip_syscall_close(int fd); static void tcp_destroy_handeler(u8_t id __attribute__((unused)), void *data) { int fd = (int) ((uintptr_t) data); { unsigned short i; for (i = 0; i < lfd[fd].num_rxpbuf; i++) pbuf_free(lfd[fd].rxpbuf[i]); } { unsigned short i; for (i = 0; i < lfd[fd].num_accept_fd; i++) lwip_syscall_close(lfd[fd].accept_fd[i]); } memset(&lfd[fd], 0, sizeof(lfd[fd])); asm volatile ("" ::: "memory"); close(fd); } static const struct tcp_ext_arg_callbacks tcp_ext_arg_cbs = { .destroy = tcp_destroy_handeler, }; static void tcp_destroy_handeler_dummy(u8_t id __attribute__((unused)), void *data __attribute__((unused))) { } static const struct tcp_ext_arg_callbacks tcp_ext_arg_cbs_dummy = { .destroy = tcp_destroy_handeler_dummy, }; static err_t tcp_recv_handler(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err) { if (err != ERR_OK) return err; if (!p) { tcp_close(tpcb); return ERR_OK; } lfd[(int)((uintptr_t) arg)].rxpbuf[lfd[(int)((uintptr_t) arg)].num_rxpbuf++] = p; return ERR_OK; } static err_t accept_handler(void *arg, struct tcp_pcb *tpcb, err_t err) { if (err != ERR_OK) return err; { int newfd; assert((newfd = open("/dev/null", O_RDONLY)) != -1); lfd[newfd].used = 1; lfd[newfd].tpcb = tpcb; tcp_arg(tpcb, (void *)((uintptr_t) newfd)); lfd[(int)((uintptr_t) arg)].accept_fd[lfd[(int)((uintptr_t) arg)].num_accept_fd++] = newfd; } tcp_recv(tpcb, tcp_recv_handler); tcp_setprio(tpcb, TCP_PRIO_MAX); tpcb->so_options |= SOF_KEEPALIVE; tpcb->keep_intvl = (60 * 1000); tpcb->keep_idle = (60 * 1000); tpcb->keep_cnt = 1; return err; } static err_t if_init(struct netif *netif) { { struct rte_ether_addr ports_eth_addr; assert(rte_eth_macaddr_get(0 /* port id */, &ports_eth_addr) >= 0); memcpy(netif->hwaddr, ports_eth_addr.addr_bytes, 6); } assert(rte_eth_dev_get_mtu(0 /* port id */, &netif->mtu) >= 0); assert(netif->mtu <= PACKET_BUF_SIZE); netif->output = etharp_output; netif->linkoutput = low_level_output; netif->hwaddr_len = 6; netif->flags = NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP; return ERR_OK; } static ssize_t lwip_syscall_read(int fd, char *buf, size_t count) { char _buf[0x2000]; if (count > sizeof(_buf)) count = sizeof(_buf); if (!lfd[fd].num_rxpbuf) { dpdk_poll(); if (!lfd[fd].num_rxpbuf) return -EAGAIN; } { unsigned short i; size_t c; for (i = 0, c = 0; i < lfd[fd].num_rxpbuf && c < count; i++) { struct pbuf *p = lfd[fd].rxpbuf[i]; size_t l = ((count - c) < (p->tot_len - lfd[fd].tmp_pbuf_off) ? (count - c) : (p->tot_len - lfd[fd].tmp_pbuf_off)); pbuf_copy_partial(p, &_buf[c], l, lfd[fd].tmp_pbuf_off); c += l; if (p->tot_len != l) { assert(c == count); lfd[fd].tmp_pbuf_off = l; } else { tcp_recved(lfd[fd].tpcb, p->tot_len); pbuf_free(p); } } memmove(&lfd[fd].rxpbuf[0], &lfd[fd].rxpbuf[i - (lfd[fd].tmp_pbuf_off ? 1 : 0)], (i - (lfd[fd].tmp_pbuf_off ? 1 : 0)) * sizeof(struct pbuf *)); lfd[fd].num_rxpbuf -= (i - (lfd[fd].tmp_pbuf_off ? 1 : 0)); copy_to_user(buf, _buf, c); return c; } } static ssize_t lwip_syscall_write(int fd, const char *buf, size_t count) { assert(tcp_sndbuf(lfd[fd].tpcb) >= count); char _buf[0x2000]; assert(count < sizeof(_buf)); copy_from_user((void *) buf, _buf, count); assert(tcp_write(lfd[fd].tpcb, _buf, count, TCP_WRITE_FLAG_COPY) == ERR_OK); assert(tcp_output(lfd[fd].tpcb) == ERR_OK); return count; } static int lwip_syscall_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); static int lwip_syscall_close(int fd) { if (!lfd[fd].close_posted) { assert(!lwip_syscall_epoll_ctl(lfd[fd].epfd, EPOLL_CTL_DEL, fd, NULL)); close_post_queue[close_post_cnt++] = fd; lfd[fd].close_posted = 1; } return 0; } static int lwip_syscall_socket(int domain, int type, int protocol) { if (domain == AF_INET && type == SOCK_STREAM && (protocol == 0 || protocol == IPPROTO_TCP)) { int fd; assert((fd = open("/dev/null", O_RDONLY)) != -1); lfd[fd].used = 1; assert((lfd[fd].tpcb = tcp_new()) != NULL); tcp_arg(lfd[fd].tpcb, (void *)((uintptr_t) fd)); tcp_ext_arg_set_callbacks(lfd[fd].tpcb, 0, &tcp_ext_arg_cbs); tcp_ext_arg_set(lfd[fd].tpcb, 0, (void *) ((uintptr_t) fd)); return fd; } else return next_sys_call(__NR_socket, domain, type, protocol, 0, 0, 0); } static int lwip_syscall_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) { (void) addr; (void) addrlen; if (!lfd[sockfd].num_accept_fd) { dpdk_poll(); if (!lfd[sockfd].num_accept_fd) return -EAGAIN; } return lfd[sockfd].accept_fd[--lfd[sockfd].num_accept_fd]; } static int lwip_syscall_bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { (void) addr; (void) addrlen; struct sockaddr_storage _addr; copy_from_user((void *) addr, &_addr, addrlen); assert(tcp_bind(lfd[sockfd].tpcb, (const ip_addr_t *) &((const struct sockaddr_in *) &_addr)->sin_addr.s_addr, ntohs(((const struct sockaddr_in *) &_addr)->sin_port)) == ERR_OK); return 0; } static int lwip_syscall_listen(int sockfd, int backlog __attribute__((unused))) { tcp_ext_arg_set_callbacks(lfd[sockfd].tpcb, 0, &tcp_ext_arg_cbs_dummy); assert((lfd[sockfd].tpcb = tcp_listen(lfd[sockfd].tpcb)) != NULL); tcp_arg(lfd[sockfd].tpcb, (void *)((uintptr_t) sockfd)); tcp_ext_arg_set_callbacks(lfd[sockfd].tpcb, 0, &tcp_ext_arg_cbs); tcp_accept(lfd[sockfd].tpcb, accept_handler); return 0; } static int lwip_syscall_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event __attribute__((unused))) { switch (op) { case EPOLL_CTL_ADD: efd[epfd].fd[efd[epfd].num_fd++] = fd; lfd[fd].epfd = epfd; break; case EPOLL_CTL_DEL: { int i; for (i = 0; i < efd[epfd].num_fd; i++) { if (efd[epfd].fd[i] == fd) { efd[epfd].fd[i] = efd[epfd].fd[--efd[epfd].num_fd]; lfd[fd].epfd = 0; break; } } } break; default: assert(0); break; } return 0; } static int lwip_syscall_epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) { struct epoll_event _events[2048]; if (maxevents > 2048) maxevents = 2048; int e = 0; { struct timespec start; assert(!clock_gettime(CLOCK_REALTIME, &start)); while (1) { { int i; for (i = 0; i < efd[epfd].num_fd && !e && e < maxevents; i++) { if (lfd[efd[epfd].fd[i]].num_accept_fd || lfd[efd[epfd].fd[i]].num_rxpbuf) { struct epoll_event *ev = &_events[e++]; ev->data.fd = efd[epfd].fd[i]; ev->events = EPOLLIN; } } if (e) break; } if (timeout >= 0) { struct timespec now; assert(!clock_gettime(CLOCK_REALTIME, &now)); if (((unsigned long) timeout * 1000) < ((now.tv_sec * 1000000000UL + now.tv_nsec) - (start.tv_sec * 1000000000UL + start.tv_nsec))) { break; } } dpdk_poll(); } } if (e) copy_to_user(events, _events, sizeof(struct epoll_event) * e); return e; } static long lwip_syscall(long a1, long a2, long a3, long a4, long a5 __attribute__((unused)), long a6 __attribute__((unused)), long a7 __attribute__((unused))) { long ret = 0; switch (a1) { case __NR_read: // 0 ret = lwip_syscall_read((int) a2, (void *) a3, (size_t) a4); break; case __NR_write: // 1 ret = lwip_syscall_write((int) a2, (const void *) a3, (size_t) a4); break; case __NR_close: // 3 ret = lwip_syscall_close((int) a2); break; case __NR_ioctl: // 16 ret = 0; break; case __NR_socket: // 41 ret = lwip_syscall_socket((int) a2, (int) a3, (int) a4); break; case __NR_accept: // 43 case __NR_accept4: // 288 ret = lwip_syscall_accept((int) a2, (struct sockaddr *) a3, (socklen_t *) a4); break; case __NR_bind: // 49 ret = lwip_syscall_bind((int) a2, (const struct sockaddr *) a3, (socklen_t) a4); break; case __NR_listen: // 50 ret = lwip_syscall_listen((int) a2, (int) a3); break; case __NR_setsockopt: // 54 ret = 0; break; case __NR_getsockopt: // 55 ret = 0; break; case __NR_fcntl: // 72 ret = 0; break; default: printf("unhandled %lu\n", a1); assert(0); break; } return ret; } static int lwip_syscall_epoll_close(int fd) { memset(&efd[fd], 0, sizeof(efd[fd])); asm volatile ("" ::: "memory"); close(fd); return 0; } static int lwip_syscall_epoll_create(int size __attribute__((unused))) { int fd; assert((fd = open("/dev/null", O_RDONLY)) != -1); efd[fd].used = 1; return fd; } static long epoll_syscall(long a1, long a2, long a3, long a4, long a5, long a6 __attribute__((unused)), long a7 __attribute__((unused))) { long ret = 0; switch (a1) { case __NR_close: // 3 ret = lwip_syscall_epoll_close((int) a2); break; case __NR_fcntl: // 72 ret = 0; break; case __NR_epoll_create: // 213 ret = lwip_syscall_epoll_create((int) a2); break; case __NR_epoll_ctl_old: // 214 ret = lwip_syscall_epoll_ctl((int) a2, (int) a3, (int) a4, (struct epoll_event *) a5); break; case __NR_epoll_wait_old: // 215 ret = lwip_syscall_epoll_wait((int) a2, (struct epoll_event *) a3, (int) a4, (int) a5); break; case __NR_epoll_wait: // 232 ret = lwip_syscall_epoll_wait((int) a2, (struct epoll_event *) a3, (int) a4, (int) a5); break; case __NR_epoll_ctl: // 233 ret = lwip_syscall_epoll_ctl((int) a2, (int) a3, (int) a4, (struct epoll_event *) a5); break; case __NR_epoll_create1: // 291 ret = lwip_syscall_epoll_create((int) a2); break; default: printf("unhandled %lu\n", a1); assert(0); break; } return ret; } static long hook_function(long a1, long a2, long a3, long a4, long a5, long a6, long a7) { switch (a1) { case __NR_socket: // 41 return lwip_syscall(a1, a2, a3, a4, a5, a6, a7); case __NR_close: // 3 case __NR_fcntl: // 72 if (lfd[a2].used) return lwip_syscall(a1, a2, a3, a4, a5, a6, a7); if (efd[a2].used) return epoll_syscall(a1, a2, a3, a4, a5, a6, a7); return next_sys_call(a1, a2, a3, a4, a5, a6, a7); case __NR_read: // 0 case __NR_write: // 1 case __NR_ioctl: // 16 case __NR_accept: // 43 case __NR_bind: // 49 case __NR_listen: // 50 case __NR_setsockopt: // 54 case __NR_getsockopt: // 55 case __NR_accept4: //288 if (lfd[a2].used) return lwip_syscall(a1, a2, a3, a4, a5, a6, a7); return next_sys_call(a1, a2, a3, a4, a5, a6, a7); case __NR_epoll_create: // 213 case __NR_epoll_ctl_old: // 214 case __NR_epoll_wait_old: // 215 case __NR_epoll_wait: // 232 case __NR_epoll_ctl: // 233 case __NR_epoll_create1: // 291 return epoll_syscall(a1, a2, a3, a4, a5, a6, a7); default: return next_sys_call(a1, a2, a3, a4, a5, a6, a7); } } int __hook_init(long placeholder __attribute__((unused)), void *sys_call_hook_ptr) { if (!getenv("NET_ADDR")) return -1; if (!getenv("NET_MASK")) return -1; if (!getenv("NET_GATE")) return -1; if (!getenv("DPDK_ARGS")) return -1; /* setting up dpdk */ { { int argc = 0; char **argv = NULL; char *argstr; assert((argstr = strdup(getenv("DPDK_ARGS"))) != NULL); { size_t l = strlen(argstr); int argvlen = 8; assert((argv = realloc(argv, sizeof(*argv) * argvlen)) != NULL); argv[argc++] = "app"; { bool prev_space = true; { size_t i; for (i = 0; i < l; i++) { if (prev_space) { if (argstr[i] != ' ') { if (argvlen < argc + 2) { argvlen += 16; assert((argv = realloc(argv, sizeof(*argv) * argvlen)) != NULL); } argv[argc++] = &argstr[i]; prev_space = false; } else argstr[i] = '\0'; } else if (argstr[i] == ' ') { argstr[i] = '\0'; prev_space = true; } } } argv[argc] = NULL; } } assert(rte_eal_init(argc, argv) >= 0); free(argv); free(argstr); } { uint16_t nb_rxd = NUM_SLOT; uint16_t nb_txd = NUM_SLOT; assert(rte_eth_dev_count_avail() == 1); assert((pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", RTE_MAX(1 /* nb_ports */ * (nb_rxd + nb_txd + MAX_PKT_BURST + 1 * MEMPOOL_CACHE_SIZE), 8192), MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id())) != NULL); { struct rte_eth_dev_info dev_info; struct rte_eth_conf local_port_conf = { 0 }; assert(rte_eth_dev_info_get(0 /* port id */, &dev_info) >= 0); assert(rte_eth_dev_configure(0 /* port id */, 1 /* num queues */, 1 /* num queues */, &local_port_conf) >= 0); assert(rte_eth_dev_adjust_nb_rx_tx_desc(0 /* port id */, &nb_rxd, &nb_txd) >= 0); assert(rte_eth_rx_queue_setup(0 /* port id */, 0 /* queue */, nb_rxd, rte_eth_dev_socket_id(0 /* port id */), &dev_info.default_rxconf, pktmbuf_pool) >= 0); assert(rte_eth_tx_queue_setup(0 /* port id */, 0 /* queue */, nb_txd, rte_eth_dev_socket_id(0 /* port id */), &dev_info.default_txconf) >= 0); assert(rte_eth_dev_start(0 /* port id */) >= 0); assert(rte_eth_promiscuous_enable(0 /* port id */) >= 0); } } } /* setting up lwip */ { lwip_init(); { ip4_addr_t _addr, _mask, _gate; inet_pton(AF_INET, getenv("NET_ADDR"), &_addr); inet_pton(AF_INET, getenv("NET_MASK"), &_mask); inet_pton(AF_INET, getenv("NET_GATE"), &_gate); assert(netif_add(&_netif, &_addr, &_mask, &_gate, NULL, if_init, ethernet_input) != NULL); } netif_set_default(&_netif); netif_set_link_up(&_netif); netif_set_up(&_netif); } next_sys_call = *((syscall_fn_t *) sys_call_hook_ptr); *((syscall_fn_t *) sys_call_hook_ptr) = hook_function; return 0; } static bool should_skip(long rax, long rdi) { switch (rax) { case __NR_socket: // 41 if (rdi == AF_INET) return true; else return false; case __NR_close: // 3 case __NR_fcntl: // 72 if (lfd[rdi].used) return true; if (efd[rdi].used) return true; return false; case __NR_read: // 0 case __NR_write: // 1 case __NR_ioctl: // 16 case __NR_accept: // 43 case __NR_bind: // 49 case __NR_listen: // 50 case __NR_setsockopt: // 54 case __NR_getsockopt: // 55 case __NR_accept4: //288 if (lfd[rdi].used) return true; return false; case __NR_epoll_create: // 213 case __NR_epoll_ctl_old: // 214 case __NR_epoll_wait_old: // 215 case __NR_epoll_wait: // 232 case __NR_epoll_ctl: // 233 case __NR_epoll_create1: // 291 return true; default: return false; } } extern long enter_syscall(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t); void ____asm_impl(void) { /* * enter_syscall triggers a kernel-space system call */ asm volatile ( ".globl enter_syscall \n\t" "enter_syscall: \n\t" "movq %rdi, %rax \n\t" "movq %rsi, %rdi \n\t" "movq %rdx, %rsi \n\t" "movq %rcx, %rdx \n\t" "movq %r8, %r10 \n\t" "movq %r9, %r8 \n\t" "movq 8(%rsp),%r9 \n\t" ".globl syscall_addr \n\t" "syscall_addr: \n\t" "syscall \n\t" "ret \n\t" ); } static long (*hook_fn)(int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7) = enter_syscall; int main(int argc, char* const* argv) { assert(argc > 1); pid = fork(); assert(pid >= 0); if (pid == 0) { assert(!ptrace(PTRACE_TRACEME, 0L, 0L, 0L)); execvp(argv[1], &argv[1]); } else { assert(!__hook_init(0, &hook_fn)); { int status; pid = wait(&status); assert(!ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_EXITKILL)); assert(!ptrace(PTRACE_SYSCALL, pid, 0, 0)); while (1) { bool skipped = false; struct user_regs_struct regs; pid = wait(&status); if (WIFEXITED(status)) break; assert(!ptrace(PTRACE_GETREGS, pid, 0, ®s)); if (should_skip(regs.orig_rax, regs.rdi)) { assert(!ptrace(PTRACE_POKEUSER, pid, offsetof(struct user_regs_struct, orig_rax), __NR_getpid)); skipped = true; } assert(!ptrace(PTRACE_SYSCALL, pid, 0, 0)); pid = wait(&status); if (WIFEXITED(status)) break; if (skipped) { regs.rax = hook_function( regs.orig_rax, regs.rdi, regs.rsi, regs.rdx, regs.r10, regs.r8, regs.r9); assert(!ptrace(PTRACE_SETREGS, pid, 0, ®s)); } assert(!ptrace(PTRACE_SYSCALL, pid, 0, 0)); } } } return 0; } ```
glue-lwip-dpdk-zpoline/Makefile ```Makefile PROGS = ptracenet CC = gcc PKGCONF = pkg-config DPDK_VER=22.11.1 LWIP_VER=2.1.3 CONTRIB_VER=2.1.0 CLEANFILES = $(PROGS) *.o *.d SRCDIR ?= ./ NO_MAN= CFLAGS = -O3 -pipe CFLAGS += -Wall -Wunused-function CFLAGS += -Wextra LDFLAGS += C_SRCS = main.c OBJS = $(C_SRCS:.c=.o) # for dpdk DPDK_DIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))/dpdk DPDK_SRC_DIR = $(DPDK_DIR)/dpdk-$(DPDK_VER) DPDK_INSTALL_DIR = $(DPDK_DIR)/install DPDK_PKG_CONFIG_PATH=$(DPDK_INSTALL_DIR)/lib/x86_64-linux-gnu/pkgconfig DPDK_PKG_CONFIG_FILE=$(DPDK_PKG_CONFIG_PATH)/libdpdk.pc CFLAGS += $(shell PKG_CONFIG_PATH=$(DPDK_PKG_CONFIG_PATH) $(PKGCONF) --cflags libdpdk) LDFLAGS += $(shell PKG_CONFIG_PATH=$(DPDK_PKG_CONFIG_PATH) $(PKGCONF) --libs libdpdk) # for lwip LWIP_DIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))/lwip LWIP_SRC_DIR = $(LWIP_DIR)/lwip-$(LWIP_VER) CONTRIB_SRC_DIR = $(LWIP_DIR)/contrib-$(CONTRIB_VER) CFLAGS += -I$(LWIP_SRC_DIR)/src/include -I$(CONTRIB_SRC_DIR) -I$(CONTRIB_SRC_DIR)/ports/unix/port/include LWIP_OBJS = $(LWIP_SRC_DIR)/src/api/api_lib.o \ $(LWIP_SRC_DIR)/src/api/api_msg.o \ $(LWIP_SRC_DIR)/src/api/err.o \ $(LWIP_SRC_DIR)/src/api/if_api.o \ $(LWIP_SRC_DIR)/src/api/netbuf.o \ $(LWIP_SRC_DIR)/src/api/netdb.o \ $(LWIP_SRC_DIR)/src/api/netifapi.o \ $(LWIP_SRC_DIR)/src/api/sockets.o \ $(LWIP_SRC_DIR)/src/api/tcpip.o \ $(LWIP_SRC_DIR)/src/core/altcp_alloc.o \ $(LWIP_SRC_DIR)/src/core/altcp.o \ $(LWIP_SRC_DIR)/src/core/altcp_tcp.o \ $(LWIP_SRC_DIR)/src/core/def.o \ $(LWIP_SRC_DIR)/src/core/dns.o \ $(LWIP_SRC_DIR)/src/core/inet_chksum.o \ $(LWIP_SRC_DIR)/src/core/init.o \ $(LWIP_SRC_DIR)/src/core/ip.o \ $(LWIP_SRC_DIR)/src/core/ipv4/autoip.o \ $(LWIP_SRC_DIR)/src/core/ipv4/dhcp.o \ $(LWIP_SRC_DIR)/src/core/ipv4/etharp.o \ $(LWIP_SRC_DIR)/src/core/ipv4/icmp.o \ $(LWIP_SRC_DIR)/src/core/ipv4/igmp.o \ $(LWIP_SRC_DIR)/src/core/ipv4/ip4_addr.o \ $(LWIP_SRC_DIR)/src/core/ipv4/ip4.o \ $(LWIP_SRC_DIR)/src/core/ipv4/ip4_frag.o \ $(LWIP_SRC_DIR)/src/core/ipv6/dhcp6.o \ $(LWIP_SRC_DIR)/src/core/ipv6/ethip6.o \ $(LWIP_SRC_DIR)/src/core/ipv6/icmp6.o \ $(LWIP_SRC_DIR)/src/core/ipv6/inet6.o \ $(LWIP_SRC_DIR)/src/core/ipv6/ip6_addr.o \ $(LWIP_SRC_DIR)/src/core/ipv6/ip6.o \ $(LWIP_SRC_DIR)/src/core/ipv6/ip6_frag.o \ $(LWIP_SRC_DIR)/src/core/ipv6/mld6.o \ $(LWIP_SRC_DIR)/src/core/ipv6/nd6.o \ $(LWIP_SRC_DIR)/src/core/mem.o \ $(LWIP_SRC_DIR)/src/core/memp.o \ $(LWIP_SRC_DIR)/src/core/netif.o \ $(LWIP_SRC_DIR)/src/core/pbuf.o \ $(LWIP_SRC_DIR)/src/core/raw.o \ $(LWIP_SRC_DIR)/src/core/stats.o \ $(LWIP_SRC_DIR)/src/core/sys.o \ $(LWIP_SRC_DIR)/src/core/tcp.o \ $(LWIP_SRC_DIR)/src/core/tcp_in.o \ $(LWIP_SRC_DIR)/src/core/tcp_out.o \ $(LWIP_SRC_DIR)/src/core/timeouts.o \ $(LWIP_SRC_DIR)/src/core/udp.o \ $(LWIP_SRC_DIR)/src/netif/ethernet.o \ $(CONTRIB_SRC_DIR)/ports/unix/port/sys_arch.o OBJS += $(LWIP_OBJS) CLEANFILES += $(LWIP_OBJS) .PHONY: all all: $(PROGS) $(DPDK_SRC_DIR).tar.xz: wget -P $(DPDK_DIR) https://fast.dpdk.org/rel/dpdk-$(DPDK_VER).tar.xz $(CONTRIB_SRC_DIR).zip: wget -P $(LWIP_DIR) http://download.savannah.nongnu.org/releases/lwip/contrib-$(CONTRIB_VER).zip $(LWIP_SRC_DIR).zip: wget -P $(LWIP_DIR) http://download.savannah.nongnu.org/releases/lwip/lwip-$(LWIP_VER).zip $(DPDK_SRC_DIR): $(DPDK_SRC_DIR).tar.xz mkdir -p $(DPDK_SRC_DIR) tar xvf $< -C $(DPDK_SRC_DIR) --strip-components 1 $(CONTRIB_SRC_DIR): $(CONTRIB_SRC_DIR).zip unzip -n $< -d $(LWIP_DIR) $(LWIP_SRC_DIR): $(LWIP_SRC_DIR).zip unzip -n $< -d $(LWIP_DIR) $(DPDK_PKG_CONFIG_FILE): $(DPDK_SRC_DIR) meson --prefix=$(DPDK_INSTALL_DIR) --libdir=lib/x86_64-linux-gnu $(DPDK_SRC_DIR)/build $(DPDK_SRC_DIR) ninja -C $(DPDK_SRC_DIR)/build ninja -C $(DPDK_SRC_DIR)/build install $(OBJS): $(CONTRIB_SRC_DIR) $(LWIP_SRC_DIR) $(DPDK_PKG_CONFIG_FILE) $(PROGS): $(OBJS) $(CC) -Werror $(CFLAGS) -o $@ $^ $(LDFLAGS) clean: -@rm -rf $(CLEANFILES) ```

Supposedly, the program above can be compiled by make typed in glue-lwip-dpdk-zpoline, and the following command will launch the Redis server while applying hooks through ptrace.

sudo NET_ADDR=10.100.0.20 NET_MASK=255.255.255.0 NET_GATE=10.100.0.1 DPDK_ARGS="-l 0 --vdev=net_tap,iface=tap001 --no-pci" LD_LIBRARY_PATH=./dpdk/install/lib/x86_64-linux-gnu ./ptracenet ~/code/redis-stable/src/redis-server --protected-mode no

Thank you very much for your message.