iovisor / bcc

BCC - Tools for BPF-based Linux IO analysis, networking, monitoring, and more
Apache License 2.0
20.61k stars 3.89k forks source link

How to parse struct data from bpf_probe_read function #2534

Open ankitnayan opened 5 years ago

ankitnayan commented 5 years ago

I have been trying to attach uprobe to ngx_http_process_request function of nginx and note time taken by this function. The eBPF code looks like this:

#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

struct val_t {
    u32 pid;
    char comm[TASK_COMM_LEN];
    char url[80];
    u64 ts;
};

struct data_t {
    u32 pid;
    u64 delta;
    char comm[TASK_COMM_LEN];
    char url[80];
};

BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(events);

int do_entry(struct pt_regs *ctx) {
    if (!PT_REGS_PARM1(ctx))
        return 0;
    struct val_t val = {};
    u32 pid = bpf_get_current_pid_tgid();
    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
        bpf_probe_read(&val.url, sizeof(val.url),
                       (void *)PT_REGS_PARM1(ctx));
        val.pid = bpf_get_current_pid_tgid();
        val.ts = bpf_ktime_get_ns();
        start.update(&pid, &val);
    }
    return 0;
}

int do_return(struct pt_regs *ctx) {
    struct val_t *valp;
    struct data_t data = {};
    u64 delta;
    u32 pid = bpf_get_current_pid_tgid();
    u64 tsp = bpf_ktime_get_ns();
    valp = start.lookup(&pid);
    if (valp == 0)
        return 0;       // missed start
    bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
    bpf_probe_read(&data.url, sizeof(data.url), (void *)valp->url);

    data.pid = valp->pid;
    data.delta = tsp - valp->ts;
    events.perf_submit(ctx, &data, sizeof(data));
    start.delete(&pid);
    return 0;
}
bpf_probe_read(&val.url, sizeof(val.url), (void *)PT_REGS_PARM1(ctx));

The bpf_probe_read command tries to read the 1st argument of the function. I am trying to store this into a char[80] variable. This makes sense when the param is a string but as we can see in the source code of nginx, this function takes in ngx_http_request_t type as input argument which itself is defined as typedef struct ngx_http_request_s ngx_http_request_t; and the ngx_http_request_s is defined at Link and this struct has uri, method, etc as fields. I want to access those fields in my code and print them. It currently prints HTTP as the 1st argument of function which is defined as signature int field at Line 372 of code.

Most of the examples of eBPF show only string or int as function argument. I want to understand how to parse struct data as function argument.

palmtenor commented 5 years ago

bpf_probe_read is just like memcpy, it just read memory of specified size into the specified place.

You will need to manually include the struct definition from nginx in your BPF code, if that's what you were asking~

ankitnayan commented 5 years ago

Instead I got the offset of uri field from struct ngx_http_request_s which is 904. Given the uri field is of type

typedef struct {
     size_t len;
     u_char *data;
} ngx_str_t;

Then I got the sizeof(size_t) which is 8 and offset of len field in ngx_str_t is 0. Hence the below code should work.

from bcc import BPF
from time import strftime

path = "/usr/sbin/nginx"
python_func_name = "ngx_http_process_request"

program = """

#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

struct val_t {
    u32 pid;
    char comm[TASK_COMM_LEN];
    //char host[200];
    unsigned long url_len;
    char url_str[128];
    u64 ts;
};

struct data_t {
    u32 pid;
    u64 delta;
    char comm[TASK_COMM_LEN];
    //char host[200];
    unsigned long url_len;
    char url_str[128];
};

BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(events);

int do_entry(struct pt_regs *ctx) {
    if (!PT_REGS_PARM1(ctx))
        return 0;
    struct val_t val = {};
    u32 pid = bpf_get_current_pid_tgid();
    void *uri_addr = (void *) ((void *)PT_REGS_PARM1(ctx) + 904);
    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
        bpf_probe_read(&val.url_len, 8, uri_addr);
        val.pid = bpf_get_current_pid_tgid();
        val.ts = bpf_ktime_get_ns();
        start.update(&pid, &val);
    }
    return 0;
}

int do_return(struct pt_regs *ctx) {
    struct val_t *valp;
    struct data_t data = {};
    u64 delta;
    u32 pid = bpf_get_current_pid_tgid();
    u64 tsp = bpf_ktime_get_ns();
    valp = start.lookup(&pid);
    if (valp == 0)
        return 0;       // missed start
    bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
    //bpf_probe_read(&data.url_str, sizeof(data.url_str), valp->url_str);
    bpf_probe_read(&data.url_len, 8, (void *)valp->url_len);

    data.pid = valp->pid;
    data.delta = tsp - valp->ts;
    events.perf_submit(ctx, &data, sizeof(data));
    start.delete(&pid);
    return 0;
}
"""

bpf = BPF(text=program)
bpf.attach_uprobe(name=path, sym=python_func_name, fn_name="do_entry")
bpf.attach_uretprobe(name=path, sym=python_func_name, fn_name="do_return")

# header
print("%-9s %-6s %-16s %10s %s" % ("TIME", "PID", "COMM", "LATms", "URL_len"))

def print_event(cpu, data, size):
    event = bpf["events"].event(data)
    print("%-9s %-6d %-16s %10.2f %lu" % (strftime("%H:%M:%S"), event.pid,
        event.comm.decode('utf-8', 'replace'), (float(event.delta) / 1000000),
        event.url_len))

# loop with callback to print_event
bpf["events"].open_perf_buffer(print_event)
# format output

while True:
    try:
        bpf.perf_buffer_poll()
    except KeyboardInterrupt:
        exit()

This is the complete code that I am using. The URL_len prints value 0 always. Any thoughts on debugging this?

sandip4n commented 5 years ago

While having this conversation with @ankitnayan about reading members of arguments which have nested structures, I came up with this sample code for which I wanted to trace the do_work() function:

/*
 * Compile with:
 *  gcc -o xyz -Wall -g -O0 xyz.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

struct string {
    size_t len;
    char  *buf;
};

struct load {
    unsigned long val;
    struct string str;
};

void do_work(struct load *l)
{
    if (!l || !l->str.len || !l->str.buf)
        return;
    printf("address of l = %016lx\n", (unsigned long) l);
    printf("address of l->str = %016lx\n", (unsigned long) &(l->str));
    printf("address of l->str.buf = %016lx\n", (unsigned long) (l->str).buf);
    printf("val = %lu, len = %lu, buf = %s",
           l->val, l->str.len, l->str.buf);
}

int main(int argc, char **argv)
{
    struct load l = {
        1000, { 12, "Hello world!" }
    };

    do_work(&l);

    return EXIT_SUCCESS;
}

Here's the bcc script to attach the uprobe handler to trace the function and read members of the nested structure that is passed as an argument to it.

#!/usr/bin/python

from __future__ import print_function
from bcc import BPF
from bcc.utils import printb
from time import sleep

# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>

struct string {
    size_t len;
    char  *buf;
};

int trace__do_work(struct pt_regs *ctx)
{
    struct string str;
    char buf[80];
    int ret;

    ret = bpf_probe_read(&str, sizeof(str), (void *) PT_REGS_PARM1(ctx) + sizeof(unsigned long));
    bpf_trace_printk("bpf_probe_read() returned %d\\n", ret);

    ret = bpf_probe_read_str(buf, sizeof(buf), (void *) str.buf);
    bpf_trace_printk("bpf_probe_read() returned %d\\n", ret);

    bpf_trace_printk("buf = %s, len = %u\\n", buf, str.len);

    return 0;
};
""")

b.attach_uprobe(name="./xyz", sym="do_work", fn_name="trace__do_work")

# sleep until Ctrl-C
try:
    sleep(99999999)
except KeyboardInterrupt:
    pass

On x86_64, the bpf_probe_read_str() fails and returns -14 i.e. EFAULT.

             xyz-26862 [000] .... 81987.272198: 0: bpf_probe_read() returned 0
             xyz-26862 [000] .... 81987.272217: 0: bpf_probe_read() returned -14
             xyz-26862 [000] .... 81987.272219: 0: buf = , len = 12

On powerpc64, it succeeds and correctly returns 13 as the no. of characters read.

           <...>-93101 [072] .... 1232098.195696: 0: bpf_probe_read() returned 0
           <...>-93101 [072] .... 1232098.195703: 0: bpf_probe_read() returned 13
           <...>-93101 [072] .... 1232098.195705: 0: buf = Hello world!, len = 12

Not sure if this is a kernel bug though. Anything to do with the recent kernel lockdown changes? @palmtenor @yonghong-song Any thoughts on this?

sandip4n commented 5 years ago

Kernel running on the x86_64 system is 5.2.16 and on the powerpc64 system is 5.2.14. Both are using upstream bcc.

yonghong-song commented 5 years ago

For

  xyz-26862 [000] .... 81987.272217: 0: bpf_probe_read() returned -14

the reason is due to page fault when reading the content. bpf_probe_read() helper disables page_fault during reading. If page fault indeed happens, it will return an error and the result buffer will be memset to 0.

One possible workaround is to use kretprobe. In kprobe, you record all necessary info and in kretprobe, you do actual reading.

sandip4n commented 5 years ago

@yonghong-song Thanks for the quick explanation and workaround. Do you know if the following patch series is going to address this issue? http://lkml.kernel.org/r/155789866428.26965.8344923934342528416.stgit@devnote2

yonghong-song commented 5 years ago

@sandip4n this won't help. See the implementation at https://lore.kernel.org/lkml/155789869802.26965.4940338412595759063.stgit@devnote2/

+static __always_inline long
+probe_read_common(void *dst, const void __user *src, size_t size)
+{
+       long ret;
+
+       pagefault_disable();
+       ret = __copy_from_user_inatomic(dst, src, size);
+       pagefault_enable();
+
+       return ret ? -EFAULT : 0;
+}

For both user and kernel read, pagefault is disabled.