multipath-tcp / mptcp_net-next

Development version of the Upstream MultiPath TCP Linux kernel 🐧
https://mptcp.dev
Other
281 stars 42 forks source link

BPF: path manager #74

Open matttbe opened 4 years ago

matttbe commented 4 years ago

Similar to #75, please see the description there.

geliangtang commented 3 years ago

Fullmesh path manager support #193

geliangtang commented 3 weeks ago

Recently, I have finally made some progress on "BPF path manager". I have implemented "subflow_create" and "subflow_destroy" interfaces in BPF program, and the self tests for them. They work very well.

Referring to the implementation of BPF packet scheduler, BPF path manager is also implemented through BPF struct ops.

There are currently no plans to implement MPTCP_PM_TYPE_KERNEL type of BPF path manager, only the userspace type of BPF path manager is supported. In other words, only the userspace path manager interface is currently extended through BPF struct_ops.

By refactoring the userspace pm code, a userspace path manager can be defined through such a set of interfaces:

struct mptcp_pm_ops { int (*address_announce)(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *addr); int (*address_remove)(struct mptcp_sock *msk, u8 id); int (*subflow_create)(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry_l, struct mptcp_addr_info *addr_r); int (*subflow_destroy)(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry_l, struct mptcp_addr_info *addr_r); int (*get_local_id)(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool (*is_backup)(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int (*get_addr)(struct mptcp_sock *msk, struct genl_info *info); int (*dump_addr)(struct mptcp_sock *msk, struct sk_buff *msg, struct netlink_callback *cb); int (*set_flags)(struct mptcp_sock *msk, struct genl_info *info);

u8 type; struct module *owner; struct list_head list;

void (*init)(struct mptcp_sock *msk); void (*release)(struct mptcp_sock *msk); } ____cacheline_aligned_in_smp;

The "subflow_create" interface is invoked in mptcp_pm_nl_subflow_create_doit:

int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { sk = (struct sock *)msk;

err = mptcp_pm_parse_entry(laddr, info, true, &entry); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto create_err; }

err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); goto create_err; }

if (msk->pm.ops->subflow_create) { err = msk->pm.ops->subflow_create(msk, &entry, &addr_r); if (err) GENL_SET_ERR_MSG(info, "subflow_create failed"); } ... ... }

And "subflow_destroy" interface is invoked in mptcp_pm_nl_subflow_destroy_doit:

int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { ... ... sk = (struct sock *)msk;

err = mptcp_pm_parse_entry(laddr, info, true, &entry_l); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto destroy_err; }

err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); goto destroy_err; }

if (msk->pm.ops->subflow_destroy) { err = msk->pm.ops->subflow_destroy(msk, &entry_l, &addr_r); if (err) GENL_SET_ERR_MSG(info, "subflow_destroy failed"); } ... ... }

Implemented the registration, unregistration and find functions of mptcp_pm_ops:

struct mptcp_pm_ops *mptcp_pm_find(u8 type); int mptcp_register_path_manager(struct mptcp_pm_ops *pm); void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm);

Added a pointer of mptcp_pm_ops in struct mptcp_pm_data:

struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; struct list_head anno_list; struct list_head userspace_pm_local_addr_list; struct mptcp_pm_ops *ops; ... ... };

Added two functions to set and release this pointer "msk->pm.ops":

int mptcp_init_pm(struct mptcp_sock *msk, struct mptcp_pm_ops *pm); void mptcp_release_pm(struct mptcp_sock *msk);

mptcp_init_pm() is invoked in mptcp_pm_data_reset():

mptcp_init_pm(msk, mptcp_pm_find(pm_type))

In the BPF part, bpf_mptcp_pm_ops of type struct bpf_struct_ops is also implemented:

static struct bpf_struct_ops bpf_mptcp_pm_ops = { .verifier_ops = &bpf_mptcp_pm_verifier_ops, .reg = bpf_mptcp_pm_reg, .unreg = bpf_mptcp_pm_unreg, .check_member = bpf_mptcp_pm_check_member, .init_member = bpf_mptcp_pm_init_member, .init = bpf_mptcp_pm_init, .name = "mptcp_pm_ops", .cfi_stubs = &__bpf_mptcp_pm_ops, };

Its implementation is very similar to bpf_mptcp_sched_ops.

Register it with mptcp_sched_ops:

ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops); ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_pm_ops, mptcp_pm_ops);

The Path manager BPF program is implemented like this:

SEC(".struct_ops") struct mptcp_pm_ops userspace_pm = { .subflow_create = (void *)mptcp_pm_subflow_create, .subflow_destroy = (void *)mptcp_pm_subflow_destroy, .init = (void *)mptcp_pm_init, .release = (void *)mptcp_pm_release, .type = MPTCP_PM_TYPE_BPF, };

Currently only the mptcp_pm_subflow_create and mptcp_pm_subflow_destroy functions are implemented:

SEC("struct_ops") int BPF_PROG(mptcp_pm_subflow_create, struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry_l, struct mptcp_addr_info *addr_r) { struct sock *sk = (struct sock *)msk; int err;

if (entry_l->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) return -1; entry_l->flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;

if (!mptcp_pm_addr_families_match(sk, &entry_l->addr, addr_r)) return -1;

err = mptcp_userspace_pm_append_new_local_addr(msk, entry_l, false); if (err < 0) return err;

err = bpf_mptcp_subflow_connect(msk, entry_l, addr_r); bpf_spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, entry_l); else msk->pm.subflows++; bpf_spin_unlock_bh(&msk->pm.lock);

return 0; }

bpf_mptcp_subflow_connect helper is implemented like this:

__bpf_kfunc int bpf_mptcp_subflow_connect(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry, struct mptcp_addr_info *addr) { struct sock *sk = (struct sock *)msk; struct mptcp_pm_local local; int err;

local.addr = entry->addr; local.flags = entry->flags; local.ifindex = entry->ifindex;

rcu_read_unlock(); lock_sock(sk); err = __mptcp_subflow_connect(sk, &local, addr); release_sock(sk); rcu_read_lock();

return err; }

mptcp_pm_subflow_destroy:

SEC("struct_ops") int BPF_PROG(mptcp_pm_subflow_destroy, struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry_l, struct mptcp_addr_info *addr_r) { struct mptcp_addr_info *addr_l = &entry_l->addr; struct sock *ssk;

if (addr_l->family != addr_r->family) return -1;

if (!addr_l->port || !addr_r->port) return -1;

ssk = bpf_mptcp_nl_find_ssk(msk, addr_l, addr_r); if (ssk) { bpf_spin_lock_bh(&msk->pm.lock); mptcp_userspace_pm_delete_local_addr(msk, entry_l); bpf_spin_unlock_bh(&msk->pm.lock); bpf_mptcp_subflow_close(msk, ssk); }

return 0; }

bpf_mptcp_subflow_close helper is implemented like this:

__bpf_kfunc void bpf_mptcp_subflow_close(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk;

rcu_read_unlock(); lock_sock(sk); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, mptcp_subflow_ctx(ssk)); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); release_sock(sk); rcu_read_lock(); }

Other interfaces are still being implemented. I would like to hear your opinions in advance.