F-Stack / f-stack

F-Stack is an user space network development kit with high performance based on DPDK, FreeBSD TCP/IP stack and coroutine API.
http://www.f-stack.org
Other
3.87k stars 899 forks source link

F-Stack with NLB #816

Open dvo-plv opened 7 months ago

dvo-plv commented 7 months ago

Hello, F-stack team

I'm trying to run your solution with a load balancer with intel nic. I have two servers that are connected to each other. My target is to run 2 web servers and 1 nlb on the server with IP 10.20.22.23 and curl data from the server 10.20.22.72 I've bind my device with vfio-pci driver:

[root@localhost nginx_fstack]# /root/dpdk/usertools/dpdk-devbind.py -s
Network devices using DPDK-compatible driver
============================================
0000:48:00.2 'I350 Gigabit Network Connection 1521' drv=vfio-pci unused=igb

Then I ran f-stack with next config:

[dpdk]
# Hexadecimal bitmask of cores to run on.
lcore_mask=1c

# Number of memory channels.
channel=4

# Specify base virtual address to map.
#base_virtaddr=0x7f0000000000

# Promiscuous mode of nic, defualt: enabled.
promiscuous=1
numa_on=1

# TX checksum offload skip, default: disabled.
# We need this switch enabled in the following cases:
# -> The application want to enforce wrong checksum for testing purposes
# -> Some cards advertize the offload capability. However, doesn't calculate checksum.
tx_csum_offoad_skip=0

# TCP segment offload, default: disabled.
tso=0

# HW vlan strip, default: enabled.
vlan_strip=1

# sleep when no pkts incomming
# unit: microseconds
idle_sleep=0

# sent packet delay time(0-100) while send less than 32 pkts.
# default 100 us.
# if set 0, means send pkts immediately.
# if set >100, will dealy 100 us.
# unit: microseconds
pkt_tx_delay=100

# use symmetric Receive-side Scaling(RSS) key, default: disabled.
symmetric_rss=0

# enabled port list
#
# EBNF grammar:
#
#    exp      ::= num_list {"," num_list}
#    num_list ::= <num> | <range>
#    range    ::= <num>"-"<num>
#    num      ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
#
# examples
#    0-3       ports 0, 1,2,3 are enabled
#    1-3,4,7   ports 1,2,3,4,7 are enabled
#
# If use bonding, shoule config the bonding port id in port_list
# and not config slave port id in port_list
# such as, port 0 and port 1 trank to a bonding port 2,
# should set `port_list=2` and config `[port2]` section

port_list=0

# Number of vdev.
nb_vdev=0

# Number of bond.
nb_bond=0

# Each core write into own pcap file, which is open one time, close one time if enough.
# Support dump the first snaplen bytes of each packet.
# if pcap file is lager than savelen bytes, it will be closed and next file was dumped into.
[pcap]
enable = 0
snaplen= 96
savelen= 16777216

# Port config section
# Correspond to dpdk.port_list's index: port0, port1...
[port0]
addr=192.168.1.2
netmask=255.255.255.0
broadcast=192.168.1.255
gateway=192.168.1.1
# IPv6 net addr
# Optional parameters
#addr6=ff::02
#prefix_len=64
#gateway6=ff::01

# lcore list used to handle this port
# the format is same as port_list
#lcore_list=0

# bonding slave port list used to handle this port
# need to config while this port is a bonding port
# the format is same as port_list
#slave_port_list=0,1

# Packet capture path, this will hurt performance
#pcap=./a.pcap

# Vdev config section
# orrespond to dpdk.nb_vdev's index: vdev0, vdev1...
#    iface : Shouldn't set always.
#    path : The vuser device path in container. Required.
#    queues : The max queues of vuser. Optional, default 1, greater or equal to the number of processes.
#    queue_size : Queue size.Optional, default 256.
#    mac : The mac address of vuser. Optional, default random, if vhost use phy NIC, it should be set to the phy NIC's mac.
#    cq : Optional, if queues = 1, default 0; if queues > 1 default 1.
#[vdev0]
##iface=/usr/local/var/run/openvswitch/vhost-user0
#path=/var/run/openvswitch/vhost-user0
#queues=1
#queue_size=256
#mac=00:00:00:00:00:01
#cq=0

# bond config section
# See http://doc.dpdk.org/guides/prog_guide/link_bonding_poll_mode_drv_lib.html
#[bond0]
#mode=4
#slave=0000:0a:00.0,slave=0000:0a:00.1
#primary=0000:0a:00.0
#mac=f0:98:38:xx:xx:xx
## opt argument
#socket_id=0
#xmit_policy=l23
#lsc_poll_period_ms=100
#up_delay=10
#down_delay=50

# Kni config: if enabled and method=reject,
# all packets that do not belong to the following tcp_port and udp_port
# will transmit to kernel; if method=accept, all packets that belong to
# the following tcp_port and udp_port will transmit to kernel.
#[kni]
#enable=1
#method=reject
# The format is same as port_list
#tcp_port=80,443
#udp_port=53

# FreeBSD network performance tuning configurations.
# Most native FreeBSD configurations are supported.
[freebsd.boot]
hz=100

# Block out a range of descriptors to avoid overlap
# with the kernel's descriptor space.
# You can increase this value according to your app.
fd_reserve=1024

kern.ipc.maxsockets=262144

net.inet.tcp.syncache.hashsize=4096
net.inet.tcp.syncache.bucketlimit=100

net.inet.tcp.tcbhashsize=65536

kern.ncallout=262144

kern.features.inet6=1
net.inet6.ip6.auto_linklocal=1
net.inet6.ip6.accept_rtadv=2
net.inet6.icmp6.rediraccept=1
net.inet6.ip6.forwarding=0

[freebsd.sysctl]
kern.ipc.somaxconn=32768
kern.ipc.maxsockbuf=16777216

net.link.ether.inet.maxhold=5

net.inet.tcp.fast_finwait2_recycle=1
net.inet.tcp.sendspace=16384
net.inet.tcp.recvspace=8192
#net.inet.tcp.nolocaltimewait=1
net.inet.tcp.cc.algorithm=cubic
net.inet.tcp.sendbuf_max=16777216
net.inet.tcp.recvbuf_max=16777216
net.inet.tcp.sendbuf_auto=1
net.inet.tcp.recvbuf_auto=1
net.inet.tcp.sendbuf_inc=16384
net.inet.tcp.recvbuf_inc=524288
net.inet.tcp.sack.enable=1
net.inet.tcp.blackhole=1
net.inet.tcp.msl=2000
net.inet.tcp.delayed_ack=0

net.inet.udp.blackhole=1
net.inet.ip.redirect=0
net.inet.ip.forwarding=0

nginx config:

# root account is necessary.
user  root;
# should be equal to the lcore count of `dpdk.lcore_mask` in f-stack.conf.
worker_processes  3;

# path of f-stack configuration file, default: $NGX_PREFIX/conf/f-stack.conf.
fstack_conf f-stack.conf;
error_log /usr/local/nginx_fstack/logs/combined.log;

events {
    worker_connections  102400;
    use kqueue;
}

http {
    #include mime.types;
    default_type application/octet-stream;
    access_log /usr/local/nginx_fstack/logs/combined.log;
    read_ahead 0;
    tcp_nopush on;
    tcp_nodelay off;
    sendfile off;

    # Cache Configurations
    proxy_cache_path /tmp/content-cache levels=2 keys_zone=nginx-cache:300m max_size=300g inactive=4d use_temp_path=off;

    # Map drives
    split_clients $request_uri $ng_cache {
        100% "nginx-cache";
    }

    # Load Balancer Upstream
    upstream backends {
        server localhost:18080;
        server localhost:18090;
        keepalive 500;
    }

    # Web Server Upstream
    upstream web_backends {
        server localhost:8888;
        keepalive 500;
    }

    # Network Load Balancer Server Block
    server {
        listen 8080 reuseport;
        listen [::]:8080 reuseport;

        #server_name _;
        server_name 192.168.1.2;

        location / {
            proxy_pass http://backends;
        }
    }

    # Web Server Server Block
    server {
        listen 18080 reuseport;
        listen [::]:18080 reuseport;

        server_name _;
        #server_name 192.168.1.2;

        location / {
            #root   html;
            #index  index.html index.htm;
            return 200 "<title>Welcome to F-Stack Nginx 1!";
        }

        #error_page  404              /404.html;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }
    }

    # Web Server Server Block
    server {
        listen 18090 reuseport;
        listen [::]:18090 reuseport;

        server_name _;
        #server_name 192.168.1.2;

        location / {
            #root   html;
            #index  index.html index.htm;
            return 200 "<title>Welcome to F-Stack Nginx 2!";
        }

        #error_page  404              /404.html;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }
    }
}

After curl from the server 10.20.22.72 I get next error:

AL: Detected CPU lcores: 112
EAL: Detected NUMA nodes: 2
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
EAL: Using IOMMU type 1 (Type 1)
EAL: Ignore mapping IO port bar(2)
EAL: Probe PCI driver: net_e1000_igb (8086:1521) device: 0000:48:00.2 (socket 0)
TELEMETRY: No legacy callbacks, legacy socket not created
EAL: Detected CPU lcores: 112
EAL: Detected NUMA nodes: 2
EAL: Detected static linkage of DPDK
EAL: Detected CPU lcores: 112
EAL: Detected NUMA nodes: 2
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket_90362_7fd8b09669c5b
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket_90363_7fd8b0970f803
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
EAL: Using IOMMU type 1 (Type 1)
EAL: Probe PCI driver: net_e1000_igb (8086:1521) device: 0000:48:00.2 (socket 0)
EAL: Using IOMMU type 1 (Type 1)
EAL: Probe PCI driver: net_e1000_igb (8086:1521) device: 0000:48:00.2 (socket 0)
2024/04/19 12:32:38 [error] 90358#0: *1 upstream timed out (110: Connection timed out) while connecting to upstream, client: 192.168.1.72, server: 192.168.1.2, request: "GET / HTTP/1.1", upstream: "http://[::1]:18080/", host: "192.168.1.2:8080"
2024/04/19 12:32:38 [alert] 90357#0: worker process 90358 exited on signal 11 (core dumped)
EAL: Detected CPU lcores: 112
EAL: Detected NUMA nodes: 2
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
EAL: Cannot allocate memzone list
EAL: FATAL: Cannot init memzone
EAL: Cannot init memzone
EAL: Error - exiting with code: 1
  Cause: Error with EAL initialization
EAL: Detected CPU lcores: 112
EAL: Detected NUMA nodes: 2
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
EAL: Cannot allocate memzone list
EAL: FATAL: Cannot init memzone
EAL: Cannot init memzone
EAL: Error - exiting with code: 1
  Cause: Error with EAL initialization

Basic scenario, when I ran your web-server (/usr/local/nginx_fstack/conf/nginx.conf) on the server .23 and curl data from the server .72 worked properly:

[root@localhost nginx_fstack]# curl 192.168.1.2:1180
<title>Welcome to F-Stack Nginx!</title>
 pad data:0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789[root@localhost nginx_fstack]# 

My system is:

[root@localhost nginx_fstack]# cat /etc/os-release 
NAME="CentOS Stream"
VERSION="8"
ID="centos"
ID_LIKE="rhel fedora"
VERSION_ID="8"
PLATFORM_ID="platform:el8"
PRETTY_NAME="CentOS Stream 8"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:centos:centos:8"
HOME_URL="https://centos.org/"
BUG_REPORT_URL="https://bugzilla.redhat.com/"
REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux 8"
REDHAT_SUPPORT_PRODUCT_VERSION="CentOS Stream"
[root@localhost nginx_fstack]# uname -r
4.18.0-552.el8.x86_64

Coudl you please advice me something with this issue?