aregm / nff-go

NFF-Go -Network Function Framework for GO (former YANFF)
BSD 3-Clause "New" or "Revised" License
1.38k stars 156 forks source link

puzzle of the nff-go performance #713

Open imroadsign opened 4 years ago

imroadsign commented 4 years ago

im testing a dns server using nff-go ,code as below, but the performace is very slow,only 3,700 qps . device info : cpu core: 4 mem : 4G net card :1 dns test tool: dnsperf

im so confused, thx a lot if u can give me a reply.

---------code here------------ package main

import ( "flag" "github.com/google/gopacket" "github.com/google/gopacket/layers" "github.com/intel-go/nff-go/flow" "github.com/intel-go/nff-go/packet" "github.com/intel-go/nff-go/types" "github.com/miekg/dns" "net" "net/http" _ "net/http/pprof" )

// Parser and pre-allocated layers to be parsed. type gopacketContext2 struct { eth layers.Ethernet ip4 layers.IPv4 ip6 layers.IPv6 tcp layers.TCP udp layers.UDP dns layers.DNS parser *gopacket.DecodingLayerParser }

// Each handler will use its own gopacketContext. func (ctx gopacketContext2) Copy() interface{} { newCtx := new(gopacketContext2) newCtx.parser = gopacket.NewDecodingLayerParser(layers.LayerTypeEthernet, &newCtx.eth, &newCtx.ip4, &newCtx.ip6, &newCtx.tcp, &newCtx.udp,&newCtx.dns) return newCtx }

func (ctx gopacketContext2) Delete() { }

func main() { port := flag.Int("port", 0, "output port") inport := uint16(*port)

flow.CheckFatal(flow.SystemInit(nil))

inputFlow, _ := flow.SetReceiver(inport)

flow.CheckFatal(flow.SetIPForPort(inport, types.IPv4Address(200)<<24|types.IPv4Address(56)<<16|types.IPv4Address(168)<<8|types.IPv4Address(192)))

flow.CheckFatal(flow.DealARPICMP(inputFlow))

var ctx gopacketContext2

flow.SetHandler(inputFlow,gopacketHandleFunc2,ctx)

go http.ListenAndServe("0.0.0.0:3999", nil)
flow.CheckFatal(flow.SetStopper(inputFlow))

flow.CheckFatal(flow.SystemStart())

}

func gopacketHandleFunc2(current *packet.Packet, context flow.UserContext) {

if current.GetUDPNoCheck()!=nil && packet.SwapBytesUint16(current.GetUDPNoCheck().DstPort) == 53 {

    current.ParseL3()

    ctx := context.(*gopacketContext2)
    parser := ctx.parser
    decoded := []gopacket.LayerType{}

    packetData := current.GetRawPacketBytes()
    parser.DecodeLayers(packetData, &decoded)

    answerPacket, _ := packet.NewPacket()

    bts := printLayersInfo2(ctx, decoded)

    packet.InitEmptyIPv4UDPPacket(answerPacket, uint(len(bts)))

    if answerPacket != nil{

        answerPacket.Ether.DAddr = current.Ether.SAddr
        answerPacket.Ether.SAddr = current.Ether.DAddr
        ipv4 := current.GetIPv4()
        // Fill up L3
        (answerPacket.GetIPv4NoCheck()).DstAddr = ipv4.SrcAddr
        (answerPacket.GetIPv4NoCheck()).SrcAddr = ipv4.DstAddr

        // Fill up L4
        answerPacket.GetUDPNoCheck().DstPort = current.GetUDPNoCheck().SrcPort
        answerPacket.GetUDPNoCheck().SrcPort = current.GetUDPNoCheck().DstPort

        payloadBuffer,_ := answerPacket.GetPacketPayload()
        copy(payloadBuffer,bts)

        cks1 := packet.SwapBytesUint16(packet.CalculateIPv4Checksum(answerPacket.GetIPv4NoCheck()))

        cks2 := packet.SwapBytesUint16(packet.CalculateIPv4UDPChecksum(answerPacket.GetIPv4NoCheck(), answerPacket.GetUDPForIPv4(), answerPacket.Data))

        //fmt.Sprintf("%x -- %x",cks1, cks2)

        answerPacket.GetIPv4().HdrChecksum = cks1
        answerPacket.GetUDPNoCheck().DgramCksum = cks2

// fmt.Println(hex.Dump(answerPacket.GetRawPacketBytes()),"answerpacket")

        answerPacket.SendPacket(0)

    }

}

}

func printLayersInfo2(ctx *gopacketContext2, decoded []gopacket.LayerType)([]byte) {

answerRR := []layers.DNSResourceRecord{
    {
        Name:  ctx.dns.Questions[0].Name,
        Type:  layers.DNSTypeA,
        Class: layers.DNSClassIN,
        TTL:   1024,
        IP:    net.IP([]byte{1, 2, 3, 4}),
    },
}

ctx.dns.QR = true
ctx.dns.Answers = answerRR
ctx.dns.Additionals = nil
ctx.dns.ARCount = 0
ctx.dns.TC = false
ctx.dns.AA = false
ctx.dns.RD = true
ctx.dns.RA = true
ctx.dns.OpCode = dns.OpcodeQuery
ctx.dns.ResponseCode = layers.DNSResponseCodeNoErr

//fmt.Println(ctx.dns)

buff := gopacket.NewSerializeBuffer()

opt := gopacket.SerializeOptions{
    FixLengths : true,
    ComputeChecksums :true,
}

ctx.dns.SerializeTo(buff,opt)

return buff.Bytes()

}

---------------go profile here --------

roadsign@ubuntu:~$ go tool pprof http://localhost:3999/debug/pprof/profile Fetching profile over HTTP from http://localhost:3999/debug/pprof/profile Saved profile in /home/roadsign/pprof/pprof.pingReplay.samples.cpu.003.pb.gz File: pingReplay Build ID: 33be144b95dc0b5aacd1622370aaaf75728a768f Type: cpu Time: Jul 16, 2020 at 2:15am (PDT) Duration: 30.18s, Total samples = 1.40mins (277.40%) Entering interactive mode (type "help" for commands, "o" for options) (pprof) top Showing nodes accounting for 81.37s, 97.20% of 83.71s total Dropped 172 nodes (cum <= 0.42s) Showing top 10 nodes out of 38 flat flat% sum% cum cum% 74.60s 89.12% 89.12% 74.63s 89.15% runtime.cgocall 2.15s 2.57% 91.69% 4.81s 5.75% runtime.selectgo 1.09s 1.30% 92.99% 1.09s 1.30% runtime.madvise 0.86s 1.03% 94.02% 0.86s 1.03% runtime.lock 0.77s 0.92% 94.93% 0.80s 0.96% runtime.unlock 0.66s 0.79% 95.72% 0.66s 0.79% runtime.futex 0.34s 0.41% 96.13% 27.46s 32.80% github.com/intel-go/nff-go/flow.segmentProcess 0.34s 0.41% 96.54% 1.18s 1.41% runtime.sellock 0.29s 0.35% 96.88% 1.05s 1.25% runtime.selunlock 0.27s 0.32% 97.20% 0.48s 0.57% runtime.scanblock (pprof) tree Showing nodes accounting for 81.41s, 97.25% of 83.71s total Dropped 172 nodes (cum <= 0.42s) ----------------------------------------------------------+------------- flat flat% sum% cum cum% calls calls% + context
----------------------------------------------------------+------------- 27.46s 36.79% | github.com/intel-go/nff-go/internal/low._Cfunc_receiveRSS 25.95s 34.77% | github.com/intel-go/nff-go/internal/low._Cfunc_nff_go_stop 21.12s 28.30% | github.com/intel-go/nff-go/internal/low._Cfunc_directSend 0.10s 0.13% | main.gopacketHandleFunc2 74.60s 89.12% 89.12% 74.63s 89.15% | runtime.cgocall ----------------------------------------------------------+------------- 4.81s 100% | github.com/intel-go/nff-go/flow.segmentProcess 2.15s 2.57% 91.69% 4.81s 5.75% | runtime.selectgo 1.18s 24.53% | runtime.sellock 1.05s 21.83% | runtime.selunlock ----------------------------------------------------------+------------- 1.09s 100% | runtime.sysUnused 1.09s 1.30% 92.99% 1.09s 1.30% | runtime.madvise ----------------------------------------------------------+------------- 0.84s 97.67% | runtime.sellock 0.86s 1.03% 94.02% 0.86s 1.03% | runtime.lock ----------------------------------------------------------+------------- 0.76s 95.00% | runtime.selunlock 0.77s 0.92% 94.93% 0.80s 0.96% | runtime.unlock ----------------------------------------------------------+------------- 0.52s 78.79% | runtime.futexwakeup 0.66s 0.79% 95.72% 0.66s 0.79% | runtime.futex ----------------------------------------------------------+------------- 27.46s 100% | github.com/intel-go/nff-go/flow.(instance).startNewClone.func1 0.34s 0.41% 96.13% 27.46s 32.80% | github.com/intel-go/nff-go/flow.segmentProcess 22.14s 80.63% | github.com/intel-go/nff-go/flow.handle 4.81s 17.52% | runtime.selectgo ----------------------------------------------------------+------------- 1.18s 100% | runtime.selectgo 0.34s 0.41% 96.54% 1.18s 1.41% | runtime.sellock 0.84s 71.19% | runtime.lock ----------------------------------------------------------+------------- 1.05s 100% | runtime.selectgo 0.29s 0.35% 96.88% 1.05s 1.25% | runtime.selunlock 0.76s 72.38% | runtime.unlock ----------------------------------------------------------+------------- 0.48s 100% | runtime.markrootBlock 0.27s 0.32% 97.20% 0.48s 0.57% | runtime.scanblock ----------------------------------------------------------+------------- 22.14s 100% | github.com/intel-go/nff-go/flow.handle 0.03s 0.036% 97.24% 22.14s 26.45% | main.gopacketHandleFunc2 21.13s 95.44% | github.com/intel-go/nff-go/packet.(Packet).SendPacket 0.10s 0.45% | runtime.cgocall

----------------------dns perf report here--------------

Statistics:

Queries sent: 1535994 Queries completed: 1535771 (99.99%) Queries lost: 180 (0.01%) Queries interrupted: 43 (0.00%)

Response codes: NOERROR 1535771 (100.00%) Average packet size: request 32, response 62 Run time (s): 48.760166 Queries per second: 31496.426817

imroadsign2 commented 4 years ago

for help~~~

aregm commented 4 years ago

Hi, @imroadsign2! Thanks for submitting the question. There are two main issues with the example - you use DPDK based Go sender which does several copies of the packet when you mix it with the gopacket, which is OS based. And NFF-Go is created to use at least 4 cores. I see a throughput of 30K which is not bad for a laptop, taking into account the number of packet copies done and the fact that you use only one core. And I suppose you are using VM, which is adding to the problem. Please run any of the examples and let me know if you see better performance, without using gopacket.

aregm commented 4 years ago

@imroadsign2 - Can I close the issue?