Open williamtu opened 7 years ago
This one won't be easy to fix. But 528 < 512 + 32 + 8. I wonder why we use so much stack. The space we need is for:
Table keys are never all live at the same time, so we should only need the largest of them on the stack.
Can you please add xdp6 and 7 to the repo?
thanks, I've pushed xdp6 and xdp7
All headers in xdp7.c require a bit more than 74 bytes, a little more than on the wire, since they are unpacked (I didn't count padding, maybe that adds up to something as well).
There are a few more local variables which add up to about 24 bytes.
The key requires 2 bytes.
So we should be pretty far from 528.
there are two MAX_BPF_STACK, previous one is at arch/x86/net/bpf_jit.S I think we are using the definition at "include/linux/filter.h"
#define MAX_BPF_STACK 512
Even that should be large enough. I wonder why we use 528 bytes of stack.
right... that's weird... let me dig deeper
have some discussion in iovisor https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000644.html
this part looks not right. the generated code machine code is using 8 byte every time to save into hd.ethernet.source[n], which should be 1 byte see 72, 80, 88, 96 ...
so I guess the ethernet header takes 8 * (6+6) + 2 byte?
18: r3 = *(u8 *)(r1 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
19: *(u64 *)(r10 - 72) = r3
20: r3 = *(u8 *)(r1 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
21: *(u64 *)(r10 - 80) = r3
22: r3 = *(u8 *)(r1 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
23: *(u64 *)(r10 - 88) = r3
24: r3 = *(u8 *)(r1 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
25: *(u64 *)(r10 - 96) = r3
26: r3 = *(u8 *)(r1 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
27: *(u64 *)(r10 - 104) = r3
28: r3 = *(u8 *)(r1 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
29: *(u64 *)(r10 - 112) = r3
30: r3 = *(u8 *)(r1 + 0)
Can you try to replace char source[6] with u8 source[6]? This does not make any sense.
The bug is in the load_byte macro - the cast should be applied to data before adding
still the same, with the following 2 changes.
#define load_byte(data, b) (*((u8 *)data + (b)))
struct Ethernet {
u8 source[6]; /* bit<48> */
u8 destination[6]; /* bit<48> */
u16 protocol; /* bit<16> */
u8 ebpf_valid;
};
can you just print sizeof(headers) and sizeof(headers.ethernet)?
another solution is to avoid using local stack, but put the struct Headers in a BPF MAP. I tried it a bit and seems ok. It's s.t lik
--- a/tmp/xdp1.c
+++ b/tmp/xdp1.c.usemap
@@ -1,5 +1,6 @@
#define KBUILD_MODNAME "xdptest"
#include <linux/bpf.h>
#include "bpf_helpers.h"
#define load_byte(data, b) (*(u8 *)(data + (b)))
@@ -36,7 +37,7 @@ struct Ethernet {
char destination[6]; /* bit<48> */
u16 protocol; /* bit<16> */
u8 ebpf_valid;
};
struct IPv4 {
u8 version; /* bit<4> */
@@ -67,16 +68,18 @@ struct bpf_map_def SEC("maps") ebpf_outTable = {
.max_entries = 1 /* No multicast support */
};
+struct bpf_map_def SEC("maps") Headers = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct Headers),
+ .pinning = 2, /* PIN_GLOBAL_NS */
+ .max_entries = 1 /* No multicast support */
+};
+
+
SEC("prog")
int ebpf_filter(struct xdp_md* skb){
- struct Headers hd = {
- .ethernet = {
- .ebpf_valid = 0
- },
- .ipv4 = {
- .ebpf_valid = 0
- },
- };
+ struct Headers hd_zero, *hd;
unsigned ebpf_packetOffsetInBits = 0;
enum ebpf_errorCodes ebpf_errorCode = NoError;
void* ebpf_packetStart = ((void*)(long)skb->data);
@@ -88,82 +91,88 @@ int ebpf_filter(struct xdp_md* skb){
/* TODO: this should be initialized by the environment. HOW? */
struct xdp_input xin;
+ memset(&hd_zero, 0, sizeof(hd_zero));
+ bpf_map_update_elem(&Headers, &ebpf_zero, &hd_zero, BPF_ANY);
+ hd = bpf_map_lookup_elem(&Headers, &ebpf_zero);
+ if (!hd)
+ return XDP_ABORTED;
+
goto start;
start: {
- /* extract(hd.ethernet)*/
+ /* extract(hd->ethernet)*/
This should be much slower. We should be able to figure out what the problem with the stack is.
adding printk
printk("headers %d ethernet %d\n", sizeof(struct Headers), sizeof(hd.ethernet));
goto start;
start: {
it prints out
<idle>-0 [001] ..s. 27808.060250: : headers 44 ethernet 16
here I'm using xdp1, headers has
struct Ethernet {
u8 source[6]; /* bit<48> */
u8 destination[6]; /* bit<48> */
u16 protocol; /* bit<16> */
u8 ebpf_valid;
};
struct IPv4 {
u8 version; /* bit<4> */
u8 ihl; /* bit<4> */
u8 diffserv; /* bit<8> */
u16 totalLen; /* bit<16> */
u16 identification; /* bit<16> */
u8 flags; /* bit<3> */
u16 fragOffset; /* bit<13> */
u8 ttl; /* bit<8> */
u8 protocol; /* bit<8> */
u16 hdrChecksum; /* bit<16> */
u32 srcAddr; /* bit<32> */
u32 dstAddr; /* bit<32> */
u8 ebpf_valid;
};
struct Headers {
struct Ethernet ethernet; /* Ethernet */
struct IPv4 ipv4; /* IPv4 */
};
So we know that stack usage is not the problem.
How about you just run the preprocessor and look at the generated C?
need to go out now, will work on it in the afternoon.
On Fri, Feb 10, 2017 at 9:37 AM, Mihai Budiu notifications@github.com wrote:
How about you just run the preprocessor and look at the generated C?
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/williamtu/p4c-xdp/issues/22#issuecomment-279010739, or mute the thread https://github.com/notifications/unsubscribe-auth/AE7w-4KKqMwTv7Q_i00kWq3GXFzuTYAWks5rbKB0gaJpZM4L7AyM .
Actually it can't be the load_byte - that's the load. The store is the problem.
I did another experiment using kernel's sample code https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000652.html
This looks like a bug in the bpf llvm back-end bug. If you look at the assembly, they always write a double-word (64 bits) no matter what the output type is; in this case, a byte.
For example, I changed the source to be just an u8, and the code generated is this:
.loc 2 89 37 # xdp7.c:89:37
std -464(r10), r1
ldb r1, 0(r2)
As far as I can tell, ldb is load byte, and std is store double.
I found a workaround this problem: if you take the address of the headers struct the compiler has to put it on the stack. So I just added this:
printk("%p", &hd);
I will add this to the generated code.
yes, it will put it on stack, but still uses 8 bytes to store an u8 for example using xdp1.p4, and adding printk("%p", &hd);
root@osboxes:~/p4c/extensions/p4c-xdp/tests# llvm-objdump -S -no-show-raw-insn xdp1.o
xdp1.o: file format ELF64-BPF
Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
0: r6 = r1
1: r7 = 0
; struct Headers hd = {
2: *(u32 *)(r10 - 8) = r7
3: *(u64 *)(r10 - 16) = r7
4: *(u64 *)(r10 - 24) = r7
5: *(u64 *)(r10 - 32) = r7
6: *(u64 *)(r10 - 40) = r7
7: *(u64 *)(r10 - 48) = r7
hd still uses 8 bytes for each u8
from Alexei's reply https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000653.html I think it's a limitation in llvm which it always spills 8byte on the stack, since BPF register size is 8byte.
I force llvm to allocate struct Headers on stack using alloca first, from bitcode to LLVM IR
clang -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign -Wno-compare-distinct-pointer-types -Wno-gnu-variable-sized-type-not-at-end -Wno-tautological-compare -O2 -emit-llvm -g -c xdp9.c
llvm-dis xdp9.bc -o /tmp/xdp9.ll
make the change in LLVM IR, xdp9.ll
%ebpf_zero = alloca i32, align 4
%xout = alloca %struct.xdp_output, align 4
%key = alloca %struct.dstmactable_key, align 2
+ %hd = alloca %struct.Headers, align 4
+ %hd.ethernet = alloca %struct.Ethernet, align 2
+ %hd.ipv4 = alloca %struct.IPv4, align 4
assembly back from LLVM IR to bitcode
llvm-as /tmp/xdp9.ll -o xdp9.bc
llc -march=bpf -filetype=obj -o xdp9.o xdp9.bc
llvm-objdump -S -no-show-raw-insn xdp9.o
result of objdump
Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
0: r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
1: r3 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
2: r2 = *(u32 *)(r6 + 0)
3: r8 = 0
; u32 ebpf_zero = 0;
4: *(u32 *)(r10 - 92) = r8
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
5: r1 = r2
6: r1 += 14
7: if r1 > r3 goto 891
8: r1 = 14
9: *(u64 *)(r10 - 136) = r1
10: r5 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
11: r1 = *(u8 *)(r2 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
12: *(u64 *)(r10 - 544) = r1
13: r1 = *(u8 *)(r2 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
14: *(u64 *)(r10 - 552) = r1
15: r1 = *(u8 *)(r2 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
16: *(u64 *)(r10 - 560) = r1
17: r1 = *(u8 *)(r2 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
18: *(u64 *)(r10 - 568) = r1
19: r1 = *(u8 *)(r2 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
20: *(u64 *)(r10 - 576) = r1
21: r1 = *(u8 *)(r2 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
22: *(u64 *)(r10 - 584) = r1
23: r1 = *(u8 *)(r2 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
24: *(u64 *)(r10 - 600) = r1
25: r1 = *(u8 *)(r2 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
26: *(u64 *)(r10 - 608) = r1
27: r1 = *(u8 *)(r2 + 3)
unfortunately it's still the same, the hd is using r10 - off, where off is 8 byte offset
I don't understand why a 'switch' statement need to clear the stack see ; switch (hd.ethernet.protocol)
; int ebpf_filter(struct xdp_md* skb){
0: r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
1: r4 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
2: r3 = *(u32 *)(r6 + 0)
3: r7 = 0
; u32 ebpf_zero = 0;
4: *(u32 *)(r10 - 4) = r7
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
5: r1 = r3
6: r1 += 14
7: if r1 > r4 goto 245
8: r2 = 14
9: r1 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
10: *(u64 *)(r10 - 120) = r1
11: r1 = *(u8 *)(r3 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
12: *(u64 *)(r10 - 136) = r1
13: r1 = *(u8 *)(r3 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
14: *(u64 *)(r10 - 144) = r1
15: r1 = *(u8 *)(r3 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
16: *(u64 *)(r10 - 152) = r1
17: r1 = *(u8 *)(r3 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
18: *(u64 *)(r10 - 160) = r1
19: r1 = *(u8 *)(r3 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
20: *(u64 *)(r10 - 168) = r1
21: r1 = *(u8 *)(r3 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
22: *(u64 *)(r10 - 176) = r1
23: r1 = *(u8 *)(r3 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
24: *(u64 *)(r10 - 184) = r1
25: r1 = *(u8 *)(r3 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
26: *(u64 *)(r10 - 192) = r1
27: r1 = *(u8 *)(r3 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
28: *(u64 *)(r10 - 200) = r1
29: r1 = *(u8 *)(r3 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
30: *(u64 *)(r10 - 208) = r1
31: r1 = *(u8 *)(r3 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
32: *(u64 *)(r10 - 216) = r1
33: r1 = *(u8 *)(r3 + 0)
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
34: *(u64 *)(r10 - 224) = r1
35: r9 = *(u16 *)(r3 + 12)
; switch (hd.ethernet.protocol) {
36: r5 = r9
37: r5 <<= 8
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
38: r8 = r9
39: r8 >>= 8
; switch (hd.ethernet.protocol) {
40: r1 = r8
41: r1 |= r5
42: r1 &= 65535
43: r5 = 0
44: *(u64 *)(r10 - 296) = r5
45: r5 = 0
46: *(u64 *)(r10 - 232) = r5
47: r5 = 0
48: *(u64 *)(r10 - 240) = r5
49: r5 = 0
50: *(u64 *)(r10 - 280) = r5
51: r5 = 0
52: *(u64 *)(r10 - 272) = r5
53: r5 = 0
54: r7 = 0
55: r0 = 0
56: *(u64 *)(r10 - 256) = r0
57: r0 = 0
58: *(u64 *)(r10 - 248) = r0
59: r0 = 0
60: *(u64 *)(r10 - 128) = r0
61: r0 = 0
62: *(u64 *)(r10 - 264) = r0
63: r0 = 0
64: *(u64 *)(r10 - 288) = r0
65: r0 = 0
66: *(u64 *)(r10 - 312) = r0
67: r0 = 0
68: *(u64 *)(r10 - 328) = r0
69: r0 = 0
70: *(u64 *)(r10 - 320) = r0
71: r0 = 0
72: *(u64 *)(r10 - 304) = r0
73: r0 = 0
74: *(u64 *)(r10 - 376) = r0
75: r0 = 0
76: *(u64 *)(r10 - 368) = r0
77: r0 = 0
78: *(u64 *)(r10 - 336) = r0
79: r0 = 0
80: *(u64 *)(r10 - 344) = r0
81: r0 = 0
82: *(u64 *)(r10 - 352) = r0
83: r0 = 0
84: *(u64 *)(r10 - 360) = r0
85: if r1 != 2048 goto 60
86: r7 = 0
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 160)) {
87: r2 = r3
88: r2 += 34
89: if r2 > r4 goto 163
90: r2 = 34
xdp7.p4 show the error message
looking at the source code
we only allow stack size of 512 + 32 + 8
need to discuss with kernel bpf maintainer...