Open ankitnayan opened 5 years ago
bpf_probe_read
is just like memcpy
, it just read memory of specified size into the specified place.
You will need to manually include the struct definition from nginx in your BPF code, if that's what you were asking~
Instead I got the offset of uri
field from struct ngx_http_request_s
which is 904. Given the uri
field is of type
typedef struct {
size_t len;
u_char *data;
} ngx_str_t;
Then I got the sizeof(size_t)
which is 8 and offset of len
field in ngx_str_t
is 0. Hence the below code should work.
from bcc import BPF
from time import strftime
path = "/usr/sbin/nginx"
python_func_name = "ngx_http_process_request"
program = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
//char host[200];
unsigned long url_len;
char url_str[128];
u64 ts;
};
struct data_t {
u32 pid;
u64 delta;
char comm[TASK_COMM_LEN];
//char host[200];
unsigned long url_len;
char url_str[128];
};
BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(events);
int do_entry(struct pt_regs *ctx) {
if (!PT_REGS_PARM1(ctx))
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
void *uri_addr = (void *) ((void *)PT_REGS_PARM1(ctx) + 904);
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
bpf_probe_read(&val.url_len, 8, uri_addr);
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
//bpf_probe_read(&data.url_str, sizeof(data.url_str), valp->url_str);
bpf_probe_read(&data.url_len, 8, (void *)valp->url_len);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
"""
bpf = BPF(text=program)
bpf.attach_uprobe(name=path, sym=python_func_name, fn_name="do_entry")
bpf.attach_uretprobe(name=path, sym=python_func_name, fn_name="do_return")
# header
print("%-9s %-6s %-16s %10s %s" % ("TIME", "PID", "COMM", "LATms", "URL_len"))
def print_event(cpu, data, size):
event = bpf["events"].event(data)
print("%-9s %-6d %-16s %10.2f %lu" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode('utf-8', 'replace'), (float(event.delta) / 1000000),
event.url_len))
# loop with callback to print_event
bpf["events"].open_perf_buffer(print_event)
# format output
while True:
try:
bpf.perf_buffer_poll()
except KeyboardInterrupt:
exit()
This is the complete code that I am using. The URL_len
prints value 0 always. Any thoughts on debugging this?
While having this conversation with @ankitnayan about reading members of arguments which have nested structures, I came up with this sample code for which I wanted to trace the do_work()
function:
/*
* Compile with:
* gcc -o xyz -Wall -g -O0 xyz.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
struct string {
size_t len;
char *buf;
};
struct load {
unsigned long val;
struct string str;
};
void do_work(struct load *l)
{
if (!l || !l->str.len || !l->str.buf)
return;
printf("address of l = %016lx\n", (unsigned long) l);
printf("address of l->str = %016lx\n", (unsigned long) &(l->str));
printf("address of l->str.buf = %016lx\n", (unsigned long) (l->str).buf);
printf("val = %lu, len = %lu, buf = %s",
l->val, l->str.len, l->str.buf);
}
int main(int argc, char **argv)
{
struct load l = {
1000, { 12, "Hello world!" }
};
do_work(&l);
return EXIT_SUCCESS;
}
Here's the bcc
script to attach the uprobe
handler to trace the function and read members of the nested structure that is passed as an argument to it.
#!/usr/bin/python
from __future__ import print_function
from bcc import BPF
from bcc.utils import printb
from time import sleep
# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>
struct string {
size_t len;
char *buf;
};
int trace__do_work(struct pt_regs *ctx)
{
struct string str;
char buf[80];
int ret;
ret = bpf_probe_read(&str, sizeof(str), (void *) PT_REGS_PARM1(ctx) + sizeof(unsigned long));
bpf_trace_printk("bpf_probe_read() returned %d\\n", ret);
ret = bpf_probe_read_str(buf, sizeof(buf), (void *) str.buf);
bpf_trace_printk("bpf_probe_read() returned %d\\n", ret);
bpf_trace_printk("buf = %s, len = %u\\n", buf, str.len);
return 0;
};
""")
b.attach_uprobe(name="./xyz", sym="do_work", fn_name="trace__do_work")
# sleep until Ctrl-C
try:
sleep(99999999)
except KeyboardInterrupt:
pass
On x86_64
, the bpf_probe_read_str()
fails and returns -14 i.e. EFAULT
.
xyz-26862 [000] .... 81987.272198: 0: bpf_probe_read() returned 0
xyz-26862 [000] .... 81987.272217: 0: bpf_probe_read() returned -14
xyz-26862 [000] .... 81987.272219: 0: buf = , len = 12
On powerpc64
, it succeeds and correctly returns 13 as the no. of characters read.
<...>-93101 [072] .... 1232098.195696: 0: bpf_probe_read() returned 0
<...>-93101 [072] .... 1232098.195703: 0: bpf_probe_read() returned 13
<...>-93101 [072] .... 1232098.195705: 0: buf = Hello world!, len = 12
Not sure if this is a kernel bug though. Anything to do with the recent kernel lockdown changes? @palmtenor @yonghong-song Any thoughts on this?
Kernel running on the x86_64
system is 5.2.16 and on the powerpc64
system is 5.2.14. Both are using upstream bcc
.
For
xyz-26862 [000] .... 81987.272217: 0: bpf_probe_read() returned -14
the reason is due to page fault when reading the content. bpf_probe_read() helper disables page_fault during reading. If page fault indeed happens, it will return an error and the result buffer will be memset to 0.
One possible workaround is to use kretprobe. In kprobe, you record all necessary info and in kretprobe, you do actual reading.
@yonghong-song Thanks for the quick explanation and workaround. Do you know if the following patch series is going to address this issue? http://lkml.kernel.org/r/155789866428.26965.8344923934342528416.stgit@devnote2
@sandip4n this won't help. See the implementation at https://lore.kernel.org/lkml/155789869802.26965.4940338412595759063.stgit@devnote2/
+static __always_inline long
+probe_read_common(void *dst, const void __user *src, size_t size)
+{
+ long ret;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, src, size);
+ pagefault_enable();
+
+ return ret ? -EFAULT : 0;
+}
For both user and kernel read, pagefault is disabled.
I have been trying to attach uprobe to ngx_http_process_request function of nginx and note time taken by this function. The eBPF code looks like this:
The bpf_probe_read command tries to read the 1st argument of the function. I am trying to store this into a char[80] variable. This makes sense when the param is a string but as we can see in the source code of nginx, this function takes in
ngx_http_request_t
type as input argument which itself is defined astypedef struct ngx_http_request_s ngx_http_request_t;
and thengx_http_request_s
is defined at Link and this struct has uri, method, etc as fields. I want to access those fields in my code and print them. It currently prints HTTP as the 1st argument of function which is defined as signature int field at Line 372 of code.Most of the examples of eBPF show only string or int as function argument. I want to understand how to parse struct data as function argument.