F-Stack / f-stack

F-Stack is an user space network development kit with high performance based on DPDK, FreeBSD TCP/IP stack and coroutine API.
http://www.f-stack.org
Other
3.81k stars 888 forks source link

求助:节点间报文echo测试,比普通socket慢好多好多。 #664

Open KylixC opened 2 years ago

KylixC commented 2 years ago

A,B两个机器,10G网卡直连。 1 A的IP规划为192.168.1.2作为Server,B的IP规划为192.168.1.3作为Client。 2 Server分别采用F-Stack来实现和普通socket来实现,Client只采用普通Socket来实现。 3 协议使用TCP。 4 简单echo(pingpong)模式。Client发送后等待接收,有接收后(可能有没收满情况),即发送下一个。 5 报文长度为4K。 6 Client发送接收1000万次后结束。统计时间。 7 测试结果: 普通Socket的Server 38秒 F-Stack的Server 99秒 在B节点采用sar观察,也证实数据倍数关系无误。 8 Server的配置文件中,和默认的有少量改动,因为原来的需要300多秒,另外还有个建链慢的问题。

Client代码如下

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>

#include <sys/socket.h>
#include <arpa/inet.h>

#include "comm_def.h"

int main(int argc, char *argv[])
{
    int sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd < 0)
    {
        printf("ERROR! socket failed! sockfd=%d errno=%d %s\n", sockfd, errno, strerror(errno));
        exit(1);
    }
    printf("INFO! socket init ok! fd=%d\n", sockfd);

    struct sockaddr_in serv_addr = {0};
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_port = htons(SERVER_PORT);
    if (inet_pton(AF_INET, SERVER_IP, &serv_addr.sin_addr) <= 0)
    {
        printf("ERROR! inet_pton failed! fd=%d\n", sockfd);
        exit(1);
    }

    printf("INFO! prepare connect server...\n");
    if (connect(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0)
    {
        printf("ERROR! connect failed! fd=%d\n", sockfd);
        exit(1);
    }
    printf("INFO! connect ok! fd=%d\n", sockfd);

    char buf[MAX_DATA_LEN] = {0};
    char data[MAX_DATA_LEN + 1] = {0};
    for (int k = 0; k < MAX_DATA_LEN; ++k)
    {
        data[k] = rand() % 10 + '0';
    }
    int sendlen = strlen(data);
    int ret = 0;
    time_t start = time(NULL);
    long int sendsum = 0;
    long int recvsum = 0;
    for (int k = 0; k < TEST_TIMES; ++k)
    {
        ret = write(sockfd, data, sendlen);
        if (ret <= 0)
        {
            printf("ERROR! write failed! sockfd=%d errno=%d %s\n", sockfd, errno, strerror(errno));
            break;
        }
        sendsum += ret;

        ret = read(sockfd, buf, ret);
        if (ret <= 0)
        {
            printf("ERROR! read failed! sockfd=%d errno=%d %s\n", sockfd, errno, strerror(errno));
            break;
        }
        recvsum += ret;
    }
    time_t end = time(NULL);
    printf("INFO! write read finish! packetlen=%d times=%d sendsum=%ld recvsum=%ld cost=%ld\n", 
        sendlen, TEST_TIMES, sendsum, recvsum, end - start);

    shutdown(sockfd, SHUT_RDWR);
    close(sockfd);
    return 0;
}

comm_def.h的文件如下

#ifndef COMM_DEF_H
#define COMM_DEF_H

#define SERVER_PORT     58600
#define MAX_DATA_LEN    4096
#define MAX_EVENTS      8
#define TEST_TIMES      10000000

const char * SERVER_IP  = "192.168.1.2";

#endif

普通Socket的Server代码如下

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>

#include <arpa/inet.h>

#include "comm_def.h"

int _sfd = 0;

int main(int argc, char * argv[])
{
    _sfd = socket(AF_INET, SOCK_STREAM, 0);
    if (_sfd < 0) 
    {
        printf("ERROR! socket failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }
    printf("INFO! socket init ok! fd=%d\n", _sfd);

    struct sockaddr_in my_addr = {0};
    my_addr.sin_family = AF_INET;
    my_addr.sin_port = htons(SERVER_PORT);
    if (inet_pton(AF_INET, SERVER_IP, &my_addr.sin_addr) <= 0)
    {
        printf("ERROR! inet_pton failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }

    int ret = bind(_sfd, (struct sockaddr *)&my_addr, sizeof(my_addr));
    if (ret < 0) 
    {
        printf("ERROR! bind failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }

    ret = listen(_sfd, 10);
    if (ret < 0) 
    {
        printf("ERROR! listen failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }

    signal(SIGPIPE, SIG_IGN);

    int cfd = 0;
    char buf[MAX_DATA_LEN] = {0};
    struct sockaddr_in remote_addr = {0};
    for (; ;)
    {
        socklen_t addrlen = sizeof(struct sockaddr);
        cfd = accept(_sfd, (struct sockaddr*)&remote_addr, &addrlen);
        printf("INFO! accept. cfd=%d\n", cfd);
        for (; ;)
        {
            ret = read(cfd, buf, MAX_DATA_LEN);
            if (ret == 0)
            {
                printf("INFO! client exit. cfd=%d\n", cfd);
                break;
            }
            write(cfd, buf, ret);
        }
        close(cfd);
    }

    return 0;
}

F-Stack的Server代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
#include <arpa/inet.h>

#include "ff_config.h"
#include "ff_api.h"

#include "comm_def.h"

struct kevent _kset = {0};
struct kevent _kevs[MAX_EVENTS] = {{0}};
int _kfd = 0;
int _sfd = 0;

int loop(void *arg)
{
    char buf[MAX_DATA_LEN] = {0};
    int nevents = ff_kevent(_kfd, NULL, 0, _kevs, MAX_EVENTS, NULL);
    for (int k = 0; k < nevents; ++k) 
    {
        struct kevent event = _kevs[k];
        int curfd = (int)event.ident;
        if (event.flags & EV_EOF) 
        {
            printf("INFO! client exit. fd=%d\n", curfd);
            ff_close(curfd);
        } 
        else if (curfd == _sfd) 
        {
            int available = (int)event.data;
            do 
            {
                int newfd = ff_accept(curfd, NULL, NULL);
                if (newfd < 0) 
                {
                    printf("ERROR! ff_accept failed! error=%d %s\n", errno, strerror(errno));
                    break;
                }

                EV_SET(&_kset, newfd, EVFILT_READ, EV_ADD, 0, 0, NULL);

                if (ff_kevent(_kfd, &_kset, 1, NULL, 0, NULL) < 0) 
                {
                    printf("ERROR! ff_kevent failed! error=%d %s\n", errno, strerror(errno));
                    return -1;
                }

                printf("INFO! accept. newfd=%d ava=%d event=%d\n", newfd, available, nevents);
                available--;
            } while (available);
        } 
        else if (event.filter == EVFILT_READ) 
        {
            size_t readlen = ff_recv(curfd, buf, MAX_DATA_LEN, 0);
            ff_send(curfd, buf, readlen, 0); //echo
        }
        else if (event.filter == EVFILT_WRITE)
        {
            char* msg = "hi, welcome connect!";
            ff_write(curfd, msg, strlen(msg));
        }
        else 
        {
            printf("WARN! unknown event=%8.8X\n", event.flags);
        }
    }

    return 0;
}

int main(int argc, char * argv[])
{
    ff_init(argc, argv);

    _sfd = ff_socket(AF_INET, SOCK_STREAM, 0);
    if (_sfd < 0) 
    {
        printf("ERROR! ff_socket failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }
    printf("INFO! ff_socket init ok! fd=%d\n", _sfd);

    struct sockaddr_in my_addr = {0};
    my_addr.sin_family = AF_INET;
    my_addr.sin_port = htons(SERVER_PORT);
    if (inet_pton(AF_INET, SERVER_IP, &my_addr.sin_addr) <= 0)
    {
        printf("ERROR! inet_pton failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }

    int ret = ff_bind(_sfd, (struct linux_sockaddr *)&my_addr, sizeof(my_addr));
    if (ret < 0) 
    {
        printf("ERROR! ff_bind failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }

    ret = ff_listen(_sfd, MAX_EVENTS);
    if (ret < 0) 
    {
        printf("ERROR! ff_listen failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        exit(1);
    }

    EV_SET(&_kset, _sfd, EVFILT_READ, EV_ADD, 0, MAX_EVENTS, NULL);

    _kfd = ff_kqueue();
    if (_kfd <= 0)
    {
        printf("ERROR! ff_kqueue failed! sockfd=%d errno=%d %s\n", _sfd, errno, strerror(errno));
        ff_close(_sfd);
        exit(1);
    }
    ff_kevent(_kfd, &_kset, 1, NULL, 0, NULL);
    printf("INFO! prepare loop! fd=%d kq=%d\n", _sfd, _kfd);
    ff_run(loop, NULL);

    return 0;
}

配置文件如下:

[dpdk]
# Hexadecimal bitmask of cores to run on.
lcore_mask=0x4

# Number of memory channels.
channel=4

# Specify base virtual address to map.
#base_virtaddr=0x7f0000000000

# Promiscuous mode of nic, defualt: enabled.
promiscuous=0
numa_on=1

# TX checksum offload skip, default: disabled.
# We need this switch enabled in the following cases:
# -> The application want to enforce wrong checksum for testing purposes
# -> Some cards advertize the offload capability. However, doesn't calculate checksum.
tx_csum_offoad_skip=0

# TCP segment offload, default: disabled.
tso=1

# HW vlan strip, default: enabled.
vlan_strip=1

# sleep when no pkts incomming
# unit: microseconds
idle_sleep=0

# sent packet delay time(0-100) while send less than 32 pkts.
# default 100 us.
# if set 0, means send pkts immediately.
# if set >100, will dealy 100 us.
# unit: microseconds
pkt_tx_delay=3

# use symmetric Receive-side Scaling(RSS) key, default: disabled.
symmetric_rss=0

# PCI device enable list.
# And driver options
#pci_whitelist=02:00.0
# for multiple PCI devices
#pci_whitelist=02:00.0,03:00.0

# enabled port list
#
# EBNF grammar:
#
#    exp      ::= num_list {"," num_list}
#    num_list ::= <num> | <range>
#    range    ::= <num>"-"<num>
#    num      ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
#
# examples
#    0-3       ports 0, 1,2,3 are enabled
#    1-3,4,7   ports 1,2,3,4,7 are enabled
#
# If use bonding, shoule config the bonding port id in port_list
# and not config slave port id in port_list
# such as, port 0 and port 1 trank to a bonding port 2,
# should set `port_list=2` and config `[port2]` section

port_list=0

# Number of vdev.
nb_vdev=0

# Number of bond.
nb_bond=0

# Each core write into own pcap file, which is open one time, close one time if enough.
# Support dump the first snaplen bytes of each packet.
# if pcap file is lager than savelen bytes, it will be closed and next file was dumped into.
[pcap]
enable=0
snaplen=96
savelen=16777216
savepath=.

# Port config section
# Correspond to dpdk.port_list's index: port0, port1...
[port0]
addr=192.168.1.2
netmask=255.255.255.0
broadcast=192.168.1.255
gateway=192.168.1.1
# set interface name, Optional parameter.
# if_name=net0

# IPv6 net addr, Optional parameters.
#addr6=ff::02
#prefix_len=64
#gateway6=ff::01

# Multi virtual IPv4/IPv6 net addr, Optional parameters.
#   `vip_ifname`: default `f-stack-x`
#   `vip_addr`: Separated by semicolons, MAX number 64;
#           Only support netmask 255.255.255.255, broadcast x.x.x.255 now, hard code in `ff_veth_setvaddr`.
#   `vip_addr6`: Separated by semicolons, MAX number 64.
#   `vip_prefix_len`: All addr6 use the same prefix now, default 64.
#vip_ifname=lo0
#vip_addr=192.168.1.3;192.168.1.4;192.168.1.5;192.168.1.6
#vip_addr6=ff::03;ff::04;ff::05;ff::06;ff::07
#vip_prefix_len=64

# lcore list used to handle this port
# the format is same as port_list
#lcore_list=0

# bonding slave port list used to handle this port
# need to config while this port is a bonding port
# the format is same as port_list
#slave_port_list=0,1

# Vdev config section
# orrespond to dpdk.nb_vdev's index: vdev0, vdev1...
#    iface : Shouldn't set always.
#    path : The vuser device path in container. Required.
#    queues : The max queues of vuser. Optional, default 1, greater or equal to the number of processes.
#    queue_size : Queue size.Optional, default 256.
#    mac : The mac address of vuser. Optional, default random, if vhost use phy NIC, it should be set to the phy NIC's mac.
#    cq : Optional, if queues = 1, default 0; if queues > 1 default 1.
#[vdev0]
##iface=/usr/local/var/run/openvswitch/vhost-user0
#path=/var/run/openvswitch/vhost-user0
#queues=1
#queue_size=256
#mac=00:00:00:00:00:01
#cq=0

# bond config section
# See http://doc.dpdk.org/guides/prog_guide/link_bonding_poll_mode_drv_lib.html
#[bond0]
#mode=4
#slave=0000:0a:00.0,slave=0000:0a:00.1
#primary=0000:0a:00.0
#mac=f0:98:38:xx:xx:xx
## opt argument
#socket_id=0
#xmit_policy=l23
#lsc_poll_period_ms=100
#up_delay=10
#down_delay=50

# Kni config: if enabled and method=reject,
# all packets that do not belong to the following tcp_port and udp_port
# will transmit to kernel; if method=accept, all packets that belong to
# the following tcp_port and udp_port will transmit to kernel.
#[kni]
#enable=1
#method=reject
# The format is same as port_list
#tcp_port=80,443
#udp_port=53

# FreeBSD network performance tuning configurations.
# Most native FreeBSD configurations are supported.
[freebsd.boot]
hz=100

# Block out a range of descriptors to avoid overlap
# with the kernel's descriptor space.
# You can increase this value according to your app.
fd_reserve=1024

kern.ipc.maxsockets=262144

net.inet.tcp.syncache.hashsize=4
net.inet.tcp.syncache.bucketlimit=4

net.inet.tcp.tcbhashsize=65536

kern.ncallout=262144

kern.features.inet6=0
# net.inet6.ip6.auto_linklocal=1
# net.inet6.ip6.accept_rtadv=2
# net.inet6.icmp6.rediraccept=1
# net.inet6.ip6.forwarding=0

[freebsd.sysctl]
kern.ipc.somaxconn=32768
kern.ipc.maxsockbuf=16777216

net.link.ether.inet.maxhold=5

net.inet.tcp.fast_finwait2_recycle=1
net.inet.tcp.sendspace=65536
net.inet.tcp.recvspace=65536
#net.inet.tcp.nolocaltimewait=1
net.inet.tcp.cc.algorithm=cubic
net.inet.tcp.sendbuf_max=16777216
net.inet.tcp.recvbuf_max=16777216
net.inet.tcp.sendbuf_auto=1
net.inet.tcp.recvbuf_auto=1
net.inet.tcp.sendbuf_inc=16384
#net.inet.tcp.recvbuf_inc=524288
net.inet.tcp.sack.enable=1
net.inet.tcp.blackhole=1
net.inet.tcp.msl=2000
net.inet.tcp.delayed_ack=1

net.inet.udp.blackhole=1
net.inet.ip.redirect=1
net.inet.ip.forwarding=0

#set default stacks:freebsd, rack or bbr
net.inet.tcp.functions_default=freebsd
jfb8856606 commented 2 years ago

Set pkt_tx_delay=100 to pkt_tx_delay=0 if you run simple ping pong test.

jfb8856606 commented 2 years ago

net.inet.tcp.delayed_ack=1 -> net.inet.tcp.delayed_ack=0

zhiaipan commented 1 month ago

我也遇到这样的问题,tcp超级慢,请问您解决这个问题了吗?