iovisor / bcc

BCC - Tools for BPF-based Linux IO analysis, networking, monitoring, and more
Apache License 2.0
20.47k stars 3.86k forks source link

verifier failure with a particular dp program #10

Open yonghong-song opened 9 years ago

yonghong-song commented 9 years ago

For the following B program:

plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$ cat bpfdev1.b
#packed "false"

// hash
struct FwdKey {
  u32 dip:32;
};
struct FwdLeaf {
  u32 fwd_idx:32;
};
Table<FwdKey, FwdLeaf, FIXED_MATCH, NONE> fwd_map(1);

// array
struct ConfigKey {
  u32 index:32;
};
struct ConfigLeaf {
  u32 bpfdev_ip:32;
  u32 slave_ip:32;
};
Table<ConfigKey, ConfigLeaf, INDEXED, AUTO> config_map(1);

// hash
struct MacaddrKey {
  u32 ip:32;
};
struct MacaddrLeaf {
  u64 mac:48;
};
Table<MacaddrKey, MacaddrLeaf, FIXED_MATCH, AUTO> macaddr_map(11);

// hash
struct SlaveKey {
  u32 slave_ip:32;
};
struct SlaveLeaf {
  u32 slave_ifindex:32;
};
Table<SlaveKey, SlaveLeaf, FIXED_MATCH, NONE> slave_map(10);

u32 main(struct proto::skbuff *skb) {
  u32 ret:32;

  if skb.pkt_type == 0 {
    // tx
    // make sure configured
    u32 slave_ip:32;

    struct ConfigKey cfg_key = {.index = 0};
    struct ConfigLeaf *cfg_leaf;
    config_map.lookup(cfg_key, cfg_leaf) {};
    on_valid(cfg_leaf) {
      slave_ip = cfg_leaf->slave_ip;
    } else {
      return 0xffffffff;
    }

    // make sure slave configured
    // tx, default to the single slave
    struct SlaveKey slave_key = {.slave_ip = slave_ip};
    struct SlaveLeaf *slave_leaf;
    slave_map.lookup(slave_key, slave_leaf);
    on_valid(slave_leaf) {
      ret = slave_leaf->slave_ifindex;
    } else {
      return 0xffffffff;
    }
  } else {
    // rx, default to stack
    ret = 0;
  }

  goto proto::ethernet;

  state proto::ethernet {
  }

  state proto::dot1q {
  }

  state proto::arp {
    if (skb.pkt_type) {
      if $arp.oper == 1 {
        struct MacaddrKey mac_key = {.ip = $arp.spa};
        struct MacaddrLeaf mac_leaf = {.mac = $arp.sha};
        macaddr_map.update(mac_key, mac_leaf);
      }
      goto EOP;
    }
  }

  state proto::ip {
  }
  state proto::udp {
    if $udp.dport != 5000 {
       goto EOP;
    }
    if (skb.pkt_type) {
      // lookup and then forward
      struct FwdKey fwd_key = {.dip = $ip.dst};
      struct FwdLeaf *fwd_val;
      fwd_map.lookup(fwd_key, fwd_val) {};
      on_valid(fwd_val) {
         return fwd_val.fwd_idx;
      } else {
         goto EOP;
      }
    } else {
      // rewrite the packet and send to a pre-configured index if needed
      u32 new_ip:32;
      u32 old_ip:32;
      u64 src_mac:48;
      u64 dst_mac:48;

      struct ConfigKey cfg_key = {.index = 0};
      struct ConfigLeaf *cfg_leaf;
      config_map.lookup(cfg_key, cfg_leaf) {};
      on_valid(cfg_leaf) {
        struct MacaddrKey mac_key = {.ip = cfg_leaf->bpfdev_ip};
        struct MacaddrLeaf *mac_leaf;

        mac_key.ip = cfg_leaf->bpfdev_ip;
        macaddr_map.lookup(mac_key, mac_leaf) {};
        on_valid (mac_leaf) {
          src_mac = mac_leaf->mac;
        } else {
      goto EOP;
        }

        mac_key.ip = cfg_leaf->slave_ip;
        macaddr_map.lookup(mac_key, mac_leaf) {};
        on_valid (mac_leaf) {
          dst_mac = mac_leaf->mac;
        } else {
      goto EOP;
        }

        // rewrite ethernet header
        pkt.rewrite_field($ethernet.dst, dst_mac);
        pkt.rewrite_field($ethernet.src, src_mac);

        // ip & udp checksum
        incr_cksum(@ip.hchecksum, $ip.src, cfg_leaf->bpfdev_ip);
        incr_cksum(@ip.hchecksum, $ip.dst, cfg_leaf->slave_ip);
        incr_cksum(@udp.crc, $ip.src, cfg_leaf->bpfdev_ip, 1);
        incr_cksum(@udp.crc, $ip.dst, cfg_leaf->slave_ip, 1);

        // rewrite ip src/dst fields
        pkt.rewrite_field($ip.src, cfg_leaf->bpfdev_ip);
        pkt.rewrite_field($ip.dst, cfg_leaf->slave_ip);

        goto EOP;

      } else {
        goto EOP;
      }
    }
  }

  state EOP {
    return ret;
  }
}

plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$

The control plane app looks like:

plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$ cat bpfdev1.py

#!/usr/bin/env python

# test program for a simple bpfdev

import sys
import commands
from ctypes import c_uint, c_ulong, Structure
from netaddr import IPAddress, EUI
from bpf import BPF
from subprocess import check_call
from unittest import main, TestCase

# map structures
class FwdKey(Structure):
    _fields_ = [("dip", c_uint)]
class FwdLeaf(Structure):
    _fields_ = [("ifc_idx", c_uint)]

class ConfigKey(Structure):
    _fields_ = [("idx", c_uint)]
class ConfigLeaf(Structure):
    _fields_ = [("bpfdev_ip", c_uint),
                ("slave_ip", c_uint)]

class MacaddrKey(Structure):
    _fields_ = [("ip", c_uint)]
class MacaddrLeaf(Structure):
    _fields_ = [("mac", c_ulong)]

class SlaveKey(Structure):
    _fields_ = [("slave_ip", c_uint)]
class SlaveLeaf(Structure):
    _fields_ = [("ifc_idx", c_uint)]

class TestBPFDev(TestCase):
    def config(self, bpfdev, ns, bpfdev_ip, bpfdev_mac, slave_ip):
        # ifup bpfdev
        check_call(["ip", "link", "set", "dev", bpfdev, "up"])
        check_call(["ifconfig", bpfdev, bpfdev_ip])
        check_call(["ifconfig", bpfdev, "hw", "ether", bpfdev_mac])

        # setup a namespace for the VM
        if_se = ns + ".eth0.se"
        if_vm = ns + ".eth0.vm"
        check_call(["ip", "netns", "add", ns])
        check_call(["ip", "link", "add", "name", if_se, "type", "veth", "peer", "name", if_vm])
        check_call(["ip", "link", "set", if_vm, "netns", ns])
        check_call(["ip", "netns", "exec", ns, "ip", "link", "set", if_vm, "name", "eth0"])
        check_call(["ip", "link", "set", if_se, "up"])
        check_call(["ip", "netns", "exec", ns, "ip", "link", "set", "eth0", "up"])
        check_call(["ip", "link", "set", "dev", if_se, "promisc", "on"])
        check_call(["ip", "netns", "exec", ns, "ifconfig", "eth0", slave_ip])

        # establish the master-slave relationships
        check_call(["ip", "link", "set", "dev", if_se, "master", bpfdev])

    def setUp(self):
        sys.stderr.write("build bpfdev programs for br22 and br33\n")
        self.prog1 = BPF("main", "bpfdev1.b", "proto.b", prog_type=BPF.BPF_PROG_TYPE_BPFDEV, debug=0)
        self.prog2 = BPF("main", "bpfdev1.b", "proto.b", prog_type=BPF.BPF_PROG_TYPE_BPFDEV, debug=0)

    # create two bpf devices
        sys.stderr.write("creating bpfdev br22 and br33\n")
        self.prog1.create_bpfdev("br22")
        self.prog2.create_bpfdev("br33")

        # configure bpfdev
        sys.stderr.write("configuring bpfdev br22 and br33\n")
        self.config("br22", "ns0", "10.0.0.4", "02:02:02:02:02:02", "10.1.1.3")
        self.config("br33", "ns1", "20.0.0.4", "04:04:04:04:04:04", "20.1.1.3")

        # prog1 table configuration
        sys.stderr.write("configuring bpfdev br22 table\n")
        fwd_if = int(commands.getoutput('ip -o link show dev br33 | awk -F\': \' \'{print $1}\''))
        sys.stderr.write("br22 special rx packet forward to %d\n" % fwd_if)
        fwd_map = self.prog1.table("fwd_map", FwdKey, FwdLeaf)
        key = FwdKey(IPAddress("10.0.0.4").value)
        leaf = FwdLeaf(fwd_if)
        fwd_map.put(key, leaf)

        config_map = self.prog1.table("config_map", ConfigKey, ConfigLeaf)
        key = ConfigKey(0)
        leaf = ConfigLeaf(IPAddress("10.0.0.4").value, IPAddress("10.1.1.3").value)
        config_map.put(key, leaf)

        macaddr_map = self.prog1.table("macaddr_map", MacaddrKey, MacaddrLeaf)
        key = MacaddrKey(IPAddress("10.0.0.4").value)
        leaf = MacaddrLeaf(EUI("02-02-02-02-02-02").value)    # 02:02:02:02:02:02
        macaddr_map.put(key, leaf)

        slave_map = self.prog1.table("slave_map", SlaveKey, SlaveLeaf)
        fwd_if = int(commands.getoutput('ip -o link show dev ns0.eth0.se | awk -F\': \' \'{print $1}\''))
        sys.stderr.write("br22 special tx packet forward to %d\n" % fwd_if)
        key = SlaveKey(IPAddress("10.1.1.3").value)
        leaf = SlaveLeaf(fwd_if)
        slave_map.put(key, leaf)

        # prog2 table configuratioin
        sys.stderr.write("configuring bpfdev br33 table\n")
        fwd_if = int(commands.getoutput('ip -o link show dev br22 | awk -F\': \' \'{print $1}\''))
        sys.stderr.write("br33 special rx packet forward to %d\n" % fwd_if)
        fwd_map = self.prog2.table("fwd_map", FwdKey, FwdLeaf)
        key = FwdKey(IPAddress("20.0.0.4").value)
        leaf = FwdLeaf(fwd_if)
        fwd_map.put(key, leaf)

        config_map = self.prog2.table("config_map", ConfigKey, ConfigLeaf)
        key = ConfigKey(0)
        leaf = ConfigLeaf(IPAddress("20.0.0.4").value, IPAddress("20.1.1.3").value)
        config_map.put(key, leaf)

        macaddr_map = self.prog2.table("macaddr_map", MacaddrKey, MacaddrLeaf)
        key = MacaddrKey(IPAddress("20.0.0.4").value)
        leaf = MacaddrLeaf(EUI("04-04-04-04-04-04").value)    # 04:04:04:04:04:04
        macaddr_map.put(key, leaf)

        slave_map = self.prog2.table("slave_map", SlaveKey, SlaveLeaf)
        fwd_if = int(commands.getoutput('ip -o link show dev ns1.eth0.se | awk -F\': \' \'{print $1}\''))
        sys.stderr.write("br33 special tx packet forward to %d\n" % fwd_if)
        key = SlaveKey(IPAddress("20.1.1.3").value)
        leaf = SlaveLeaf(fwd_if)
        slave_map.put(key, leaf)

    sys.stderr.write("prog1 config_map")
        config_map = self.prog1.table("config_map", ConfigKey, ConfigLeaf)
        key = ConfigKey(0)
    leaf = config_map.get(key)
        print "config1", leaf.bpfdev_ip, leaf.slave_ip

    sys.stderr.write("prog2 config_map")
        config_map = self.prog2.table("config_map", ConfigKey, ConfigLeaf)
        key = ConfigKey(0)
    leaf = config_map.get(key)
        print "config2", leaf.bpfdev_ip, leaf.slave_ip

    def test_ping(self):
        sys.stderr.write("testing ping between master and slave\n")
        check_call(["ip", "netns", "exec", "ns0", "ping", "-c4", "10.0.0.4"])
        check_call(["ip", "netns", "exec", "ns1", "ping", "-c4", "20.0.0.4"])

        # sys.stderr.write("testing forwarding from br22 to br33\n")
        # check_call(["ip", "netns", "exec", "ns1", "/usr/bin/python", "/home/plumgrid/bpf/recv_udp.py", "&"])
        # check_call(["ip", "netns", "exec", "ns0", "/usr/bin/python", "/home/plumgrid/bpf/send_udp.py"])

if __name__ == "__main__":
    main()

plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$

In order to run complete test, there are other changes in bcc are needed to support new bpfdev device and these changes are not included here.

The test failed with the following symtom:

5: from 39 to 89: R0=imm5000 R1=imm0 R6=ctx R7=imm0 R8=imm14 R9=inv R10=fp
5: 89: (b7) r9 = 0
5: 90: (63) *(u32 *)(r10 -56) = r9
5: 91: (18) r1 = 0x587280
5: 93: (bf) r2 = r10
5: 94: (07) r2 += -56
5: 95: (85) call 1
5: 96: (bf) r1 = r0
5: 97: (15) if r0 == 0x0 goto pc-11
5:  R0=map_value(ks=4,vs=8) R1=map_value_or_null(ks=4,vs=8) R6=ctx R7=imm0 R8=imm14 R9=imm0 R10=fp
5: 98: (05) goto pc+0
5: 99: (63) *(u32 *)(r10 -64) = r9
5: 100: (bf) r2 = r1
5: 101: (7b) *(u64 *)(r10 -72) = r2
5: 102: (61) r1 = *(u32 *)(r2 +0)
5: R2 invalid mem access 'map_value_or_null'
5: 
5: ERROR: test_ping (__main__.TestBPFDev)
5: ----------------------------------------------------------------------
5: Traceback (most recent call last):
5:   File "/home/plumgrid/iovisor/bcc/tests/jit/bpfdev1.py", line 59, in setUp
5:     self.prog1 = BPF("main", "bpfdev1.b", "proto.b", prog_type=BPF.BPF_PROG_TYPE_BPFDEV, debug=0)
5:   File "/home/plumgrid/iovisor/bcc/src/bpf.py", line 64, in __init__
5:     self.load(self.name)
5:   File "/home/plumgrid/iovisor/bcc/src/bpf.py", line 78, in load
5:     raise Exception("Failed to load BPF program %s" % self.name)
5: Exception: Failed to load BPF program main
5: 
5: ----------------------------------------------------------------------

instruction "96" does an assignment from r0 to r1, and verifier thinks r1 could be map_value or null, although instruction "97" checks "r0" for null.

The compiler ought to generate better code. The instruction "r1 = r0" is not necessary.

I dumped IR (change py program debug=0 to debug=1), and feed the IR to llc. `llc -march=bpf -filetype=asm -O3 b.ll``

llc also generates similar code:

LBB8_16:                                # %if.else20
        mov     r9, 0
        stw     -56(r10), r9
        ld_pseudo       r1, 1, 6
        mov     r2, r10
        addi    r2, -56
        call    1
        mov     r1, r0
        jeqi    r0, 0 goto LBB8_12
        jmp     LBB8_18
LBB8_18:                                # %onvalid.then25
        stw     -64(r10), r9
        mov     r2, r1
        std     -72(r10), r2
        ldw     r1, 0(r2)
        stw     -64(r10), r1

-O2 generates similar code.

Studying the LLVM optimization passes, there is a path in LLVM which called "virtual register rewrite" and it indeed removes SOME of the above redundant copies, but not all of them, hence causing the issue.

FYI, I changed LLVM to print out the pass applied during bcc compiler optimization and below is the result:

5: Target Transform Information
5: Target Pass Configuration
5: No Alias Analysis (always returns 'may' alias)
5: Type-Based Alias Analysis
5: Scoped NoAlias Alias Analysis
5: Assumption Cache Tracker
5: Target Library Information
5: Basic Alias Analysis (stateless AA impl)
5: Create Garbage Collector Module Metadata
5: Machine Module Information
5: Machine Branch Probability Analysis
5:   ModulePass Manager
5:     FunctionPass Manager
5:       Dominator Tree Construction
5:       Natural Loop Information
5:       Canonicalize natural loops
5:       Scalar Evolution Analysis
5:       Loop Pass Manager
5:         Induction Variable Users
5:         Loop Strength Reduction
5:       Lower Garbage Collection Instructions
5:       Shadow Stack GC Lowering
5:       Remove unreachable blocks from the CFG
5:       Dominator Tree Construction
5:       Constant Hoisting
5:       Partially inline calls to library functions
5:       CodeGen Prepare
5:     Rewrite Symbols
5:     FunctionPass Manager
5:       Lower invoke and unwind, for unwindless code generators
5:       Remove unreachable blocks from the CFG
5:       Insert stack protectors
5:       Machine Function Analysis
5:       Dominator Tree Construction
5:       Natural Loop Information
5:       Branch Probability Analysis
5:       BPF DAG->DAG Pattern Instruction Selection
5:       Expand ISel Pseudo-instructions
5:       Tail Duplication
5:       Optimize machine instruction PHIs
5:       MachineDominator Tree Construction
5:       Slot index numbering
5:       Merge disjoint stack slots
5:       Local Stack Slot Allocation
5:       Remove dead machine instructions
5:       MachineDominator Tree Construction
5:       Machine Natural Loop Construction
5:       Machine Loop Invariant Code Motion
5:       Machine Common Subexpression Elimination
5:       MachinePostDominator Tree Construction
5:       Machine Block Frequency Analysis
5:       Machine code sinking
5:       Peephole Optimizations
5:       Remove dead machine instructions
5:       Process Implicit Definitions
5:       Remove unreachable machine basic blocks
5:       Live Variable Analysis
5:       MachineDominator Tree Construction
5:       Machine Natural Loop Construction
5:       Eliminate PHI nodes for register allocation
5:       Two-Address instruction pass
5:       Slot index numbering
5:       Live Interval Analysis
5:       Simple Register Coalescing
5:       Machine Instruction Scheduler
5:       Machine Block Frequency Analysis
5:       Debug Variable Analysis
5:       Live Stack Slot Analysis
5:       Virtual Register Map
5:       Live Register Matrix
5:       Bundle Machine CFG Edges
5:       Spill Code Placement Analysis
5:       Greedy Register Allocator
5:       Virtual Register Rewriter
5:       Stack Slot Coloring
5:       Machine Loop Invariant Code Motion
5:       Prologue/Epilogue Insertion & Frame Finalization
5:       Machine Block Frequency Analysis
5:       Control Flow Optimizer
5:       Tail Duplication
5:       Machine Copy Propagation Pass
5:       Post-RA pseudo instruction expansion pass
5:       MachineDominator Tree Construction
5:       Machine Natural Loop Construction
5:       Post RA top-down list latency scheduler
5:       Analyze Machine Code For Garbage Collection
5:       Machine Block Frequency Analysis
5:       Branch Probability Basic Block Placement
5:       StackMap Liveness Analysis
5:       BPF Assembly Printer

Checking llc compiler passes, it is very similar (I did not compare one-to-one) to the above for function passes.

In summary, this is an LLVM issue and we may have to fix there.

yonghong-song commented 9 years ago

The following diff fixed the problem:

plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$ git diff

diff --git a/tests/jit/bpfdev1.b b/tests/jit/bpfdev1.b
index c2ecddd..bc04002 100644
--- a/tests/jit/bpfdev1.b
+++ b/tests/jit/bpfdev1.b
@@ -110,49 +110,53 @@ u32 main(struct proto::skbuff *skb) {
       u32 old_ip:32;
       u64 src_mac:48;
       u64 dst_mac:48;
+      u32 bpfdev_ip:32;
+      u32 slave_ip:32;

       struct ConfigKey cfg_key = {.index = 0};
       struct ConfigLeaf *cfg_leaf;
       config_map.lookup(cfg_key, cfg_leaf) {};
       on_valid(cfg_leaf) {
-        struct MacaddrKey mac_key = {.ip = cfg_leaf->bpfdev_ip};
-        struct MacaddrLeaf *mac_leaf;
-
-        mac_key.ip = cfg_leaf->bpfdev_ip;
-        macaddr_map.lookup(mac_key, mac_leaf) {};
-        on_valid (mac_leaf) {
-          src_mac = mac_leaf->mac;
-        } else {
-         goto EOP;
-        }
-
-        mac_key.ip = cfg_leaf->slave_ip;
-        macaddr_map.lookup(mac_key, mac_leaf) {};
-        on_valid (mac_leaf) {
-          dst_mac = mac_leaf->mac;
-        } else {
-         goto EOP;
-        }
-
-        // rewrite ethernet header
-        pkt.rewrite_field($ethernet.dst, dst_mac);
-        pkt.rewrite_field($ethernet.src, src_mac);
-
-        // ip & udp checksum
-        incr_cksum(@ip.hchecksum, $ip.src, cfg_leaf->bpfdev_ip);
-        incr_cksum(@ip.hchecksum, $ip.dst, cfg_leaf->slave_ip);
-        incr_cksum(@udp.crc, $ip.src, cfg_leaf->bpfdev_ip, 1);
-        incr_cksum(@udp.crc, $ip.dst, cfg_leaf->slave_ip, 1);
-
-        // rewrite ip src/dst fields
-        pkt.rewrite_field($ip.src, cfg_leaf->bpfdev_ip);
-        pkt.rewrite_field($ip.dst, cfg_leaf->slave_ip);
+        bpfdev_ip = cfg_leaf->bpfdev_ip;
+        slave_ip = cfg_leaf->slave_ip;
+      } else {
+        goto EOP;
+      }

+      struct MacaddrKey mac_key = {.ip = bpfdev_ip};
+      struct MacaddrLeaf *mac_leaf;
+
+      mac_key.ip = bpfdev_ip;
+      macaddr_map.lookup(mac_key, mac_leaf) {};
+      on_valid (mac_leaf) {
+        src_mac = mac_leaf->mac;
+      } else {
         goto EOP;
+      }

+      mac_key.ip = slave_ip;
+      macaddr_map.lookup(mac_key, mac_leaf) {};
+      on_valid (mac_leaf) {
+        dst_mac = mac_leaf->mac;
       } else {
         goto EOP;
       }
+
+      // rewrite ethernet header
+      pkt.rewrite_field($ethernet.dst, dst_mac);
+      pkt.rewrite_field($ethernet.src, src_mac);
+
+      // ip & udp checksum
+      incr_cksum(@ip.hchecksum, $ip.src, bpfdev_ip);
+      incr_cksum(@ip.hchecksum, $ip.dst, slave_ip);
+      incr_cksum(@udp.crc, $ip.src, bpfdev_ip, 1);
+      incr_cksum(@udp.crc, $ip.dst, slave_ip, 1);
+
+      // rewrite ip src/dst fields
+      pkt.rewrite_field($ip.src, bpfdev_ip);
+      pkt.rewrite_field($ip.dst, slave_ip);
+
+      goto EOP;
     }
   }

plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$

Basically, the map result should have a short live range by copying to local variable.

4ast commented 9 years ago

the diff is mangled it seems. It seems it's a workaround by using inside knowledge of front-end gen ? I think we should try to fix it in the backend. Clearly r2=r1 assignments are redundant. I also don't 'remat' pass to be run. It's important. Otherwise constant values will be copied between registers instead of using 'mov Rx, imm' which is faster, less register pressure and easier on verifier.

drzaeus77 commented 9 years ago

I've updated the diff with proper github markdown syntax. Alexei, do you have any advice on how to fix it in the backend, or are you considering that your own AI?

4ast commented 9 years ago

I don't have a concrete plan. I suspect there is still something we missing about pass_manager. Probably need to enable several more passes.

On Mon, May 11, 2015 at 6:34 AM, Brenden notifications@github.com wrote:

I've updated the diff with proper github markdown syntax. Alexei, do you have any advice on how to fix it in the backend, or are you considering that your own AI?

— Reply to this email directly or view it on GitHub https://github.com/plumgrid/bcc/issues/10#issuecomment-100908929.