vmware-archive / p4c-xdp

Backend for the P4 compiler targeting XDP
Apache License 2.0
171 stars 26 forks source link

verifier: xdp7.p4 hit the kernel max BPF stack size #22

Open williamtu opened 7 years ago

williamtu commented 7 years ago

xdp7.p4 show the error message

394: (05) goto pc+56
451: (7b) *(u64 *)(r10 -528) = r0
invalid stack off=-528 size=8

looking at the source code

    } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
        if (off >= 0 || off < -MAX_BPF_STACK) {
            verbose("invalid stack off=%d size=%d\n", off, size);
            return -EACCES;
        }

we only allow stack size of 512 + 32 + 8

#define MAX_BPF_STACK (512 /* from filter.h */ + \
    32 /* space for rbx,r13,r14,r15 */ + \
    8 /* space for skb_copy_bits */)

need to discuss with kernel bpf maintainer...

mihaibudiu commented 7 years ago

This one won't be easy to fix. But 528 < 512 + 32 + 8. I wonder why we use so much stack. The space we need is for:

Table keys are never all live at the same time, so we should only need the largest of them on the stack.

Can you please add xdp6 and 7 to the repo?

williamtu commented 7 years ago

thanks, I've pushed xdp6 and xdp7

mihaibudiu commented 7 years ago

All headers in xdp7.c require a bit more than 74 bytes, a little more than on the wire, since they are unpacked (I didn't count padding, maybe that adds up to something as well).

There are a few more local variables which add up to about 24 bytes.

The key requires 2 bytes.

So we should be pretty far from 528.

williamtu commented 7 years ago

there are two MAX_BPF_STACK, previous one is at arch/x86/net/bpf_jit.S I think we are using the definition at "include/linux/filter.h"

#define MAX_BPF_STACK   512
mihaibudiu commented 7 years ago

Even that should be large enough. I wonder why we use 528 bytes of stack.

williamtu commented 7 years ago

right... that's weird... let me dig deeper

williamtu commented 7 years ago

have some discussion in iovisor https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000644.html

williamtu commented 7 years ago

this part looks not right. the generated code machine code is using 8 byte every time to save into hd.ethernet.source[n], which should be 1 byte see 72, 80, 88, 96 ...

so I guess the ethernet header takes 8 * (6+6) + 2 byte?

      18:   r3 = *(u8 *)(r1 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      19:   *(u64 *)(r10 - 72) = r3
      20:   r3 = *(u8 *)(r1 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      21:   *(u64 *)(r10 - 80) = r3
      22:   r3 = *(u8 *)(r1 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      23:   *(u64 *)(r10 - 88) = r3
      24:   r3 = *(u8 *)(r1 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      25:   *(u64 *)(r10 - 96) = r3
      26:   r3 = *(u8 *)(r1 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      27:   *(u64 *)(r10 - 104) = r3
      28:   r3 = *(u8 *)(r1 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      29:   *(u64 *)(r10 - 112) = r3
      30:   r3 = *(u8 *)(r1 + 0)
mihaibudiu commented 7 years ago

Can you try to replace char source[6] with u8 source[6]? This does not make any sense.

mihaibudiu commented 7 years ago

The bug is in the load_byte macro - the cast should be applied to data before adding

williamtu commented 7 years ago

still the same, with the following 2 changes.

 #define load_byte(data, b)  (*((u8 *)data + (b)))
struct Ethernet {
    u8 source[6]; /* bit<48> */
    u8 destination[6]; /* bit<48> */
    u16 protocol; /* bit<16> */
    u8 ebpf_valid;
};
mihaibudiu commented 7 years ago

can you just print sizeof(headers) and sizeof(headers.ethernet)?

williamtu commented 7 years ago

another solution is to avoid using local stack, but put the struct Headers in a BPF MAP. I tried it a bit and seems ok. It's s.t lik

--- a/tmp/xdp1.c
+++ b/tmp/xdp1.c.usemap
@@ -1,5 +1,6 @@
 #define KBUILD_MODNAME "xdptest"
 #include <linux/bpf.h>
 #include "bpf_helpers.h"

 #define load_byte(data, b)  (*(u8 *)(data + (b)))
@@ -36,7 +37,7 @@ struct Ethernet {
     char destination[6]; /* bit<48> */
     u16 protocol; /* bit<16> */
     u8 ebpf_valid;
};

 struct IPv4 {
     u8 version; /* bit<4> */
@@ -67,16 +68,18 @@ struct bpf_map_def SEC("maps") ebpf_outTable = {
     .max_entries = 1 /* No multicast support */
 };

+struct bpf_map_def SEC("maps") Headers = {
+    .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+    .key_size = sizeof(u32),
+    .value_size = sizeof(struct Headers),
+    .pinning = 2, /* PIN_GLOBAL_NS */
+    .max_entries = 1 /* No multicast support */
+};
+
+
 SEC("prog")
 int ebpf_filter(struct xdp_md* skb){
-    struct Headers hd = {
-        .ethernet = {
-            .ebpf_valid = 0
-        },
-        .ipv4 = {
-            .ebpf_valid = 0
-        },
-    };
+    struct Headers hd_zero, *hd;
     unsigned ebpf_packetOffsetInBits = 0;
     enum ebpf_errorCodes ebpf_errorCode = NoError;
     void* ebpf_packetStart = ((void*)(long)skb->data);
@@ -88,82 +91,88 @@ int ebpf_filter(struct xdp_md* skb){
     /* TODO: this should be initialized by the environment. HOW? */
     struct xdp_input xin;

+       memset(&hd_zero, 0, sizeof(hd_zero));
+       bpf_map_update_elem(&Headers, &ebpf_zero, &hd_zero, BPF_ANY);
+       hd = bpf_map_lookup_elem(&Headers, &ebpf_zero);
+       if (!hd)
+               return XDP_ABORTED;
+
     goto start;
     start: {
-        /* extract(hd.ethernet)*/
+        /* extract(hd->ethernet)*/
mihaibudiu commented 7 years ago

This should be much slower. We should be able to figure out what the problem with the stack is.

williamtu commented 7 years ago

adding printk

    printk("headers %d ethernet %d\n", sizeof(struct Headers), sizeof(hd.ethernet));
    goto start;
    start: {

it prints out

          <idle>-0     [001] ..s. 27808.060250: : headers 44 ethernet 16

here I'm using xdp1, headers has


struct Ethernet {
    u8 source[6]; /* bit<48> */
    u8 destination[6]; /* bit<48> */
    u16 protocol; /* bit<16> */
    u8 ebpf_valid;
};

struct IPv4 {
    u8 version; /* bit<4> */
    u8 ihl; /* bit<4> */
    u8 diffserv; /* bit<8> */
    u16 totalLen; /* bit<16> */
    u16 identification; /* bit<16> */
    u8 flags; /* bit<3> */
    u16 fragOffset; /* bit<13> */
    u8 ttl; /* bit<8> */
    u8 protocol; /* bit<8> */
    u16 hdrChecksum; /* bit<16> */
    u32 srcAddr; /* bit<32> */
    u32 dstAddr; /* bit<32> */
    u8 ebpf_valid;
};

struct Headers {
    struct Ethernet ethernet; /* Ethernet */
    struct IPv4 ipv4; /* IPv4 */
};
mihaibudiu commented 7 years ago

So we know that stack usage is not the problem.

mihaibudiu commented 7 years ago

How about you just run the preprocessor and look at the generated C?

williamtu commented 7 years ago

need to go out now, will work on it in the afternoon.

On Fri, Feb 10, 2017 at 9:37 AM, Mihai Budiu notifications@github.com wrote:

How about you just run the preprocessor and look at the generated C?

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/williamtu/p4c-xdp/issues/22#issuecomment-279010739, or mute the thread https://github.com/notifications/unsubscribe-auth/AE7w-4KKqMwTv7Q_i00kWq3GXFzuTYAWks5rbKB0gaJpZM4L7AyM .

mihaibudiu commented 7 years ago

Actually it can't be the load_byte - that's the load. The store is the problem.

williamtu commented 7 years ago

I did another experiment using kernel's sample code https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000652.html

mihaibudiu commented 7 years ago

This looks like a bug in the bpf llvm back-end bug. If you look at the assembly, they always write a double-word (64 bits) no matter what the output type is; in this case, a byte.

mihaibudiu commented 7 years ago

For example, I changed the source to be just an u8, and the code generated is this:

.loc    2 89 37                 # xdp7.c:89:37
std -464(r10), r1
ldb r1, 0(r2)

As far as I can tell, ldb is load byte, and std is store double.

mihaibudiu commented 7 years ago

I found a workaround this problem: if you take the address of the headers struct the compiler has to put it on the stack. So I just added this:

printk("%p", &hd);

I will add this to the generated code.

williamtu commented 7 years ago

yes, it will put it on stack, but still uses 8 bytes to store an u8 for example using xdp1.p4, and adding printk("%p", &hd);

root@osboxes:~/p4c/extensions/p4c-xdp/tests# llvm-objdump -S -no-show-raw-insn xdp1.o

xdp1.o: file format ELF64-BPF

Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
       0:   r6 = r1
       1:   r7 = 0
; struct Headers hd = {
       2:   *(u32 *)(r10 - 8) = r7
       3:   *(u64 *)(r10 - 16) = r7
       4:   *(u64 *)(r10 - 24) = r7
       5:   *(u64 *)(r10 - 32) = r7
       6:   *(u64 *)(r10 - 40) = r7
       7:   *(u64 *)(r10 - 48) = r7

hd still uses 8 bytes for each u8

williamtu commented 7 years ago

from Alexei's reply https://lists.iovisor.org/pipermail/iovisor-dev/2017-February/000653.html I think it's a limitation in llvm which it always spills 8byte on the stack, since BPF register size is 8byte.

williamtu commented 7 years ago

I force llvm to allocate struct Headers on stack using alloca first, from bitcode to LLVM IR

clang -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign -Wno-compare-distinct-pointer-types -Wno-gnu-variable-sized-type-not-at-end -Wno-tautological-compare -O2 -emit-llvm -g -c xdp9.c
llvm-dis xdp9.bc -o /tmp/xdp9.ll

make the change in LLVM IR, xdp9.ll

  %ebpf_zero = alloca i32, align 4
  %xout = alloca %struct.xdp_output, align 4
  %key = alloca %struct.dstmactable_key, align 2
+  %hd = alloca %struct.Headers, align 4
+  %hd.ethernet = alloca %struct.Ethernet, align 2
+  %hd.ipv4 = alloca %struct.IPv4, align 4

assembly back from LLVM IR to bitcode

llvm-as /tmp/xdp9.ll -o xdp9.bc
llc -march=bpf -filetype=obj -o xdp9.o xdp9.bc
llvm-objdump -S -no-show-raw-insn xdp9.o

result of objdump

Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
       0:   r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
       1:   r3 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
       2:   r2 = *(u32 *)(r6 + 0)
       3:   r8 = 0
; u32 ebpf_zero = 0;
       4:   *(u32 *)(r10 - 92) = r8
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
       5:   r1 = r2
       6:   r1 += 14
       7:   if r1 > r3 goto 891
       8:   r1 = 14
       9:   *(u64 *)(r10 - 136) = r1
      10:   r5 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      11:   r1 = *(u8 *)(r2 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      12:   *(u64 *)(r10 - 544) = r1
      13:   r1 = *(u8 *)(r2 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      14:   *(u64 *)(r10 - 552) = r1
      15:   r1 = *(u8 *)(r2 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      16:   *(u64 *)(r10 - 560) = r1
      17:   r1 = *(u8 *)(r2 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      18:   *(u64 *)(r10 - 568) = r1
      19:   r1 = *(u8 *)(r2 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      20:   *(u64 *)(r10 - 576) = r1
      21:   r1 = *(u8 *)(r2 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      22:   *(u64 *)(r10 - 584) = r1
      23:   r1 = *(u8 *)(r2 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      24:   *(u64 *)(r10 - 600) = r1
      25:   r1 = *(u8 *)(r2 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      26:   *(u64 *)(r10 - 608) = r1
      27:   r1 = *(u8 *)(r2 + 3)

unfortunately it's still the same, the hd is using r10 - off, where off is 8 byte offset

williamtu commented 7 years ago

I don't understand why a 'switch' statement need to clear the stack see ; switch (hd.ethernet.protocol)

; int ebpf_filter(struct xdp_md* skb){
       0:   r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
       1:   r4 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
       2:   r3 = *(u32 *)(r6 + 0)
       3:   r7 = 0
; u32 ebpf_zero = 0;
       4:   *(u32 *)(r10 - 4) = r7
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
       5:   r1 = r3
       6:   r1 += 14
       7:   if r1 > r4 goto 245
       8:   r2 = 14
       9:   r1 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      10:   *(u64 *)(r10 - 120) = r1
      11:   r1 = *(u8 *)(r3 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      12:   *(u64 *)(r10 - 136) = r1
      13:   r1 = *(u8 *)(r3 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      14:   *(u64 *)(r10 - 144) = r1
      15:   r1 = *(u8 *)(r3 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      16:   *(u64 *)(r10 - 152) = r1
      17:   r1 = *(u8 *)(r3 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      18:   *(u64 *)(r10 - 160) = r1
      19:   r1 = *(u8 *)(r3 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      20:   *(u64 *)(r10 - 168) = r1
      21:   r1 = *(u8 *)(r3 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
      22:   *(u64 *)(r10 - 176) = r1
      23:   r1 = *(u8 *)(r3 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
      24:   *(u64 *)(r10 - 184) = r1
      25:   r1 = *(u8 *)(r3 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
      26:   *(u64 *)(r10 - 192) = r1
      27:   r1 = *(u8 *)(r3 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
      28:   *(u64 *)(r10 - 200) = r1
      29:   r1 = *(u8 *)(r3 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
      30:   *(u64 *)(r10 - 208) = r1
      31:   r1 = *(u8 *)(r3 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
      32:   *(u64 *)(r10 - 216) = r1
      33:   r1 = *(u8 *)(r3 + 0)
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
      34:   *(u64 *)(r10 - 224) = r1
      35:   r9 = *(u16 *)(r3 + 12)
; switch (hd.ethernet.protocol) {
      36:   r5 = r9
      37:   r5 <<= 8
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
      38:   r8 = r9
      39:   r8 >>= 8
; switch (hd.ethernet.protocol) {
      40:   r1 = r8
      41:   r1 |= r5
      42:   r1 &= 65535
      43:   r5 = 0
      44:   *(u64 *)(r10 - 296) = r5
      45:   r5 = 0
      46:   *(u64 *)(r10 - 232) = r5
      47:   r5 = 0
      48:   *(u64 *)(r10 - 240) = r5
      49:   r5 = 0
      50:   *(u64 *)(r10 - 280) = r5
      51:   r5 = 0
      52:   *(u64 *)(r10 - 272) = r5
      53:   r5 = 0
      54:   r7 = 0
      55:   r0 = 0
      56:   *(u64 *)(r10 - 256) = r0
      57:   r0 = 0
      58:   *(u64 *)(r10 - 248) = r0
      59:   r0 = 0
      60:   *(u64 *)(r10 - 128) = r0
      61:   r0 = 0
      62:   *(u64 *)(r10 - 264) = r0
      63:   r0 = 0
      64:   *(u64 *)(r10 - 288) = r0
      65:   r0 = 0
      66:   *(u64 *)(r10 - 312) = r0
      67:   r0 = 0
      68:   *(u64 *)(r10 - 328) = r0
      69:   r0 = 0
      70:   *(u64 *)(r10 - 320) = r0
      71:   r0 = 0
      72:   *(u64 *)(r10 - 304) = r0
      73:   r0 = 0
      74:   *(u64 *)(r10 - 376) = r0
      75:   r0 = 0
      76:   *(u64 *)(r10 - 368) = r0
      77:   r0 = 0
      78:   *(u64 *)(r10 - 336) = r0
      79:   r0 = 0
      80:   *(u64 *)(r10 - 344) = r0
      81:   r0 = 0
      82:   *(u64 *)(r10 - 352) = r0
      83:   r0 = 0
      84:   *(u64 *)(r10 - 360) = r0
      85:   if r1 != 2048 goto 60
      86:   r7 = 0
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 160)) {
      87:   r2 = r3
      88:   r2 += 34
      89:   if r2 > r4 goto 163
      90:   r2 = 34