nanovms / nanos

A kernel designed to run one and only one application in a virtualized environment
https://nanos.org
Apache License 2.0
2.66k stars 137 forks source link

issue: kernel crash on TCP bind/listen when address already in use #2066

Closed rinor closed 2 months ago

rinor commented 2 months ago

cpu: 0, context type: 2 lastvector: 000000000000000e (Page fault) frame: ffffc00002a20000 type: syscall active_cpu: 0000000000000000 stack top: ffffc00002a27ff0 error code: 0000000000000002 address: 0000000000000000

rax: 0000000000000000 rbx: ffffc00002e8c1c8 rcx: 0000000000001f90 rdx: 0000000000000000 rsi: 000000000100007f rdi: 0000000000000000 rbp: ffffc00002a27f48 rsp: ffffc00002a27f48 r8: 0000000000000000 r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000202 r12: ffffc00000a01400 r13: 0000000000000032 r14: 0000000000000000 r15: 0000000000000006 rip: ffffffff8bea5e44 (tcp_ref + 0000000000000004/000000000000000a) rflags: 0000000000010002 ss: 0000000000000008 cs: 0000000000000010 ds: 0000000000000000 es: 0000000000000000 fsbase: 0000000000000000 gsbase: 0000000000000000

frame trace: ffffc00002a27f50: ffffffff8bde2698 (netsock_listen + 00000000000000a8/0000000000000107) ffffc00002a27f70: ffffffff8be46f37 (syscall_handler + 0000000000000147/0000000000000332)

kernel load offset ffffffff8bb83000

loaded klibs:

stack trace: ffffc00002a27f48: ffffc00002a27f68 ffffc00002a27f50: ffffffff8bde2698 (netsock_listen + 00000000000000a8/0000000000000107) ffffc00002a27f58: ffffc00002c02800 ffffc00002a27f60: ffffc00002a20000 ffffc00002a27f68: ffffc00002a27fe8 ffffc00002a27f70: ffffffff8be46f37 (syscall_handler + 0000000000000147/0000000000000332) ffffc00002a27f78: ffffc00002a27fb8 ffffc00002a27f80: ffffffff8be48631 (refcount_release + 0000000000000041/00000000000000b4) ffffc00002a27f88: ffffc00002a20000 ffffc00002a27f90: ffffc00000a01400 ffffc00002a27f98: ffffc00002a27fb8 ffffc00002a27fa0: d9d58bfa8b8b330e ffffc00002a27fa8: d9d58bfa8b8b330e ffffc00002a27fb0: ffffc00002a00000 ffffc00002a27fb8: ffffc00002a27fe8 ffffc00002a27fc0: ffffc00002a20000 ffffc00002a27fc8: 000000c00001a0a0 ffffc00002a27fd0: 0000000000000000 ffffc00002a27fd8: 000000c0000076c0 ffffc00002a27fe0: 03ffffffffffffff ffffc00002a27fe8: 000000c000076a78 ffffc00002a27ff0: 0000000000000000 ffffc00002a27ff8: 0000000000000000 ffffc00002a28000: 0000000000000000 ffffc00002a28008: 0000000000000000 ffffc00002a28010: 0000000000000000 ffffc00002a28018: 0000000000000000 ffffc00002a28020: 0000000000000000 ffffc00002a28028: 0000000000000000 ffffc00002a28030: 0000000000000000 ffffc00002a28038: 0000000000000000 ffffc00002a28040: 0000000000000000


- `code to reproduce`

```go
package main

import (
    "fmt"
    "net"
    "os"
)

func main() {
    address := "127.0.0.1:8080"

    go startTCPServer(address)
    go startTCPServer(address)

    select {} // Wait indefinitely
}

func startTCPServer(address string) {
    listener, err := net.Listen("tcp", address)
    if err != nil {
        fmt.Println("Error starting TCP server:", err)
        os.Exit(1)
    }
    defer listener.Close()
    fmt.Println("TCP server listening on", address)

    for {
        conn, err := listener.Accept()
        if err != nil {
            fmt.Println("Error accepting TCP connection:", err)
            continue
        }
        go handleTCPConnection(conn)
    }
}

func handleTCPConnection(conn net.Conn) {
    defer conn.Close()
    fmt.Println("TCP connection established:", conn.RemoteAddr())
    // Handle TCP connection (e.g., read/write data)
}

func startUDPServer(address string) {
    addr, err := net.ResolveUDPAddr("udp", address)
    if err != nil {
        fmt.Println("Error resolving UDP address:", err)
        os.Exit(1)
    }

    conn, err := net.ListenUDP("udp", addr)
    if err != nil {
        fmt.Println("Error starting UDP server:", err)
        os.Exit(1)
    }
    defer conn.Close()
    fmt.Println("UDP server listening on", address)

    buffer := make([]byte, 1024)
    for {
        n, remoteAddr, err := conn.ReadFromUDP(buffer)
        if err != nil {
            fmt.Println("Error reading UDP packet:", err)
            continue
        }
        fmt.Printf("Received %d bytes from %s: %s\n", n, remoteAddr, string(buffer[:n]))
        // Handle UDP packet (e.g., respond to the sender)
    }
}
rinor commented 2 months ago

So far I tracked it till to https://github.com/nanovms/lwip/blob/79cd89f99d1032cc5375569e5b24c375b9d230fa/src/core/tcp.c#L723 flow causing the crash

rinor commented 2 months ago

I don't like this at all, but had to make those changes to move along. - https://github.com/nanovms/nanos/pull/2068

@@ -2088,7 +2088,12 @@ static sysreturn netsock_listen(struct sock *sock, int backlog)
         }
         goto unlock_out;
     }
-    struct tcp_pcb * lw = tcp_listen_with_backlog(s->info.tcp.lw, backlog);
+    err_t err;
+    struct tcp_pcb * lw = tcp_listen_with_backlog_and_err(s->info.tcp.lw, backlog, &err);
+    if (!lw) {
+        rv = lwip_to_errno(err);
+        goto unlock_out;
+    }
     tcp_unref(s->info.tcp.lw);
     tcp_ref(lw);
     s->info.tcp.lw = lw;