nats-io / nats-server

High-Performance server for NATS.io, the cloud and edge native messaging system.
https://nats.io
Apache License 2.0
14.93k stars 1.36k forks source link

Upgrading golang causes jetstream issues #5210

Open KnownSubset opened 2 months ago

KnownSubset commented 2 months ago

Observed behavior

Running a server with Jetstream enabled.

=== RUN   TestRegisterMetrics
2024-03-13T02:08:13.428Z        INFO    utils/zapAdapter.go:13  Starting nats-server
2024-03-13T02:08:13.428Z        INFO    utils/zapAdapter.go:13    Version:  2.10.12
2024-03-13T02:08:13.428Z        INFO    utils/zapAdapter.go:13    Git:      [not set]
2024-03-13T02:08:13.428Z        INFO    utils/zapAdapter.go:13    Name:     NAQTKNAMICWQRWDC56CH5F7FMGLCGUT2I7PB6ROTX6AJVZQ3LPOWHCG6
2024-03-13T02:08:13.428Z        INFO    utils/zapAdapter.go:13    Node:     P6HJf9Mr
2024-03-13T02:08:13.428Z        INFO    utils/zapAdapter.go:13    ID:       NAQTKNAMICWQRWDC56CH5F7FMGLCGUT2I7PB6ROTX6AJVZQ3LPOWHCG6
2024-03-13T02:08:13.430Z        INFO    utils/zapAdapter.go:13  Starting JetStream
fatal error: checkptr: converted pointer straddles multiple allocations

goroutine 37 gp=0xc00018cfc0 m=8 mp=0xc000082408 [running]:
runtime.throw({0x1036b07a3?, 0xc000492278?})
        GOROOT/src/runtime/panic.go:1023 +0x40 fp=0xc000488630 sp=0xc000488600 pc=0x102657f70
runtime.checkptrAlignment(0x103674a0a?, 0xa?, 0x0?)
        GOROOT/src/runtime/checkptr.go:26 +0x70 fp=0xc000488650 sp=0xc000488630 pc=0x1026245a0
github.com/nats-io/nats-server/v2/server/sysmem.sysctlInt64({0x103674a0a, 0xa})
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/sysmem/sysctl.go:31 +0x118 fp=0xc000488740 sp=0xc000488650 pc=0x102c020a8
github.com/nats-io/nats-server/v2/server/sysmem.Memory()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/sysmem/mem_darwin.go:20 +0x3c fp=0xc000488790 sp=0xc000488740 pc=0x102c01f5c
github.com/nats-io/nats-server/v2/server.(*Server).dynJetStreamConfig(0xc000408008, {0xc0001fe240, 0x52}, 0xffffffffffffffff, 0xffffffffffffffff)
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/jetstream.go:2477 +0x5ac fp=0xc0004889e0 sp=0xc000488790 pc=0x102e2495c
github.com/nats-io/nats-server/v2/server.(*Server).EnableJetStream(0xc000408008, 0xc000489698)
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/jetstream.go:198 +0x298 fp=0xc000488dc0 sp=0xc0004889e0 pc=0x102e07e68
github.com/nats-io/nats-server/v2/server.(*Server).Start(0xc000408008)
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:2272 +0x2330 fp=0xc000489dc0 sp=0xc000488dc0 pc=0x1030cb280
git.corp.tanium.com/tanium/go/internal/messaging/metrics.TestRegisterMetrics(0xc0001ac820)
        internal/messaging/metrics/register_test.go:16 +0x6c fp=0xc000489ed0 sp=0xc000489dc0 pc=0x103626a7c
testing.tRunner(0xc0001ac820, 0x103bcf670)
        GOROOT/src/testing/testing.go:1689 +0x184 fp=0xc000489fa0 sp=0xc000489ed0 pc=0x1028bd044
testing.(*T).Run.gowrap1()
        GOROOT/src/testing/testing.go:1742 +0x44 fp=0xc000489fd0 sp=0xc000489fa0 pc=0x1028be4e4
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000489fd0 sp=0xc000489fd0 pc=0x102696c54
created by testing.(*T).Run in goroutine 1
        GOROOT/src/testing/testing.go:1742 +0x5e8

goroutine 1 gp=0xc0000021c0 m=nil [chan receive]:
runtime.gopark(0x0?, 0xc00019e768?, 0x18?, 0x0?, 0x1026a554c?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000315260 sp=0xc000315240 pc=0x10265ad08
runtime.chanrecv(0xc0001f20e0, 0xc00031536f, 0x1)
        GOROOT/src/runtime/chan.go:583 +0x3a8 fp=0xc0003152f0 sp=0xc000315260 pc=0x102623d48
runtime.chanrecv1(0x1042bb660?, 0x103a45860?)
        GOROOT/src/runtime/chan.go:442 +0x14 fp=0xc000315320 sp=0xc0003152f0 pc=0x102623964
testing.(*T).Run(0xc0001ac680, {0x10367cd48, 0x13}, 0x103bcf670)
        GOROOT/src/testing/testing.go:1750 +0x604 fp=0xc000315440 sp=0xc000315320 pc=0x1028be2e4
testing.runTests.func1(0xc0001ac680)
        GOROOT/src/testing/testing.go:2161 +0x84 fp=0xc000315490 sp=0xc000315440 pc=0x1028c19d4
testing.tRunner(0xc0001ac680, 0xc000315698)
        GOROOT/src/testing/testing.go:1689 +0x184 fp=0xc000315560 sp=0xc000315490 pc=0x1028bd044
testing.runTests(0xc00019e708, {0x104246d80, 0x1, 0x1}, {0xc0000021c0?, 0x10526a7cc?, 0x0?})
        GOROOT/src/testing/testing.go:2159 +0x6e4 fp=0xc0003156c0 sp=0xc000315560 pc=0x1028c1864
testing.(*M).Run(0xc000347cc0)
        GOROOT/src/testing/testing.go:2027 +0xb70 fp=0xc000315a40 sp=0xc0003156c0 pc=0x1028bf6a0
main.main()
        bazel-out/darwin_arm64-dbg/bin/internal/messaging/metrics/metrics_test_/testmain.go:121 +0x47c fp=0xc000315f40 sp=0xc000315a40 pc=0x103627c5c
runtime.main()
        GOROOT/src/runtime/proc.go:271 +0x28c fp=0xc000315fd0 sp=0xc000315f40 pc=0x10265a8ac
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000315fd0 sp=0xc000315fd0 pc=0x102696c54

goroutine 2 gp=0xc000002c40 m=nil [force gc (idle)]:
runtime.gopark(0x0?, 0x10426c3d0?, 0x60?, 0xe3?, 0x0?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc00007a790 sp=0xc00007a770 pc=0x10265ad08
runtime.goparkunlock(...)
        GOROOT/src/runtime/proc.go:408
runtime.forcegchelper()
        GOROOT/src/runtime/proc.go:326 +0xb8 fp=0xc00007a7d0 sp=0xc00007a790 pc=0x10265ab68
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc00007a7d0 sp=0xc00007a7d0 pc=0x102696c54
created by runtime.init.6 in goroutine 1
        GOROOT/src/runtime/proc.go:314 +0x24

goroutine 3 gp=0xc000003180 m=nil [GC sweep wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000092f60 sp=0xc000092f40 pc=0x10265ad08
runtime.goparkunlock(...)
        GOROOT/src/runtime/proc.go:408
runtime.bgsweep(0xc0000a4000)
        GOROOT/src/runtime/mgcsweep.go:278 +0xa0 fp=0xc000092fb0 sp=0xc000092f60 pc=0x1026449f0
runtime.gcenable.gowrap1()
        GOROOT/src/runtime/mgc.go:203 +0x28 fp=0xc000092fd0 sp=0xc000092fb0 pc=0x102638b78
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000092fd0 sp=0xc000092fd0 pc=0x102696c54
created by runtime.gcenable in goroutine 1
        GOROOT/src/runtime/mgc.go:203 +0x6c

goroutine 4 gp=0xc000003340 m=nil [GC scavenge wait]:
runtime.gopark(0xc0000a4000?, 0x1039a2ca0?, 0x1?, 0x0?, 0xc000003340?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc00008cf60 sp=0xc00008cf40 pc=0x10265ad08
runtime.goparkunlock(...)
        GOROOT/src/runtime/proc.go:408
runtime.(*scavengerState).park(0x1042bcca0)
        GOROOT/src/runtime/mgcscavenge.go:425 +0x5c fp=0xc00008cf90 sp=0xc00008cf60 pc=0x10264241c
runtime.bgscavenge(0xc0000a4000)
        GOROOT/src/runtime/mgcscavenge.go:653 +0x44 fp=0xc00008cfb0 sp=0xc00008cf90 pc=0x102642944
runtime.gcenable.gowrap2()
        GOROOT/src/runtime/mgc.go:204 +0x28 fp=0xc00008cfd0 sp=0xc00008cfb0 pc=0x102638b18
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc00008cfd0 sp=0xc00008cfd0 pc=0x102696c54
created by runtime.gcenable in goroutine 1
        GOROOT/src/runtime/mgc.go:204 +0xac

goroutine 18 gp=0xc000104700 m=nil [finalizer wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000184d80 sp=0xc000184d60 pc=0x10265ad08
runtime.runfinq()
        GOROOT/src/runtime/mfinal.go:194 +0x12c fp=0xc000184fd0 sp=0xc000184d80 pc=0x102637c4c
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000184fd0 sp=0xc000184fd0 pc=0x102696c54
created by runtime.createfing in goroutine 1
        GOROOT/src/runtime/mfinal.go:164 +0x80

goroutine 24 gp=0xc000293500 m=nil [select]:
runtime.gopark(0xc0000c7bc0?, 0x2?, 0x68?, 0x76?, 0xc0000c77bc?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc0000c7640 sp=0xc0000c7620 pc=0x10265ad08
runtime.selectgo(0xc0000c7bc0, 0xc0000c77b8, 0x0?, 0x0, 0x0?, 0x1)
        GOROOT/src/runtime/select.go:327 +0x694 fp=0xc0000c7770 sp=0xc0000c7640 pc=0x10266f2f4
github.com/nats-io/nats-server/v2/server.(*sendq).internalLoop(0xc000135368)
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/sendq.go:62 +0x400 fp=0xc0000c7f40 sp=0xc0000c7770 pc=0x1030b30a0
github.com/nats-io/nats-server/v2/server.(*sendq).internalLoop-fm()
        <autogenerated>:1 +0x3c fp=0xc0000c7f70 sp=0xc0000c7f40 pc=0x1031ca36c
github.com/nats-io/nats-server/v2/server.(*Server).startGoRoutine.func1()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:3738 +0x68 fp=0xc0000c7fd0 sp=0xc0000c7f70 pc=0x1030db2a8
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc0000c7fd0 sp=0xc0000c7fd0 pc=0x102696c54
created by github.com/nats-io/nats-server/v2/server.(*Server).startGoRoutine in goroutine 37
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:3736 +0x238

goroutine 22 gp=0xc00018ca80 m=nil [select, locked to thread]:
runtime.gopark(0xc000093fa0?, 0x2?, 0x98?, 0x3e?, 0xc000093f90?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000093e20 sp=0xc000093e00 pc=0x10265ad08
runtime.selectgo(0xc000093fa0, 0xc000093f8c, 0x0?, 0x0, 0x2?, 0x1)
        GOROOT/src/runtime/select.go:327 +0x694 fp=0xc000093f50 sp=0xc000093e20 pc=0x10266f2f4
runtime.ensureSigM.func1()
        GOROOT/src/runtime/signal_unix.go:1034 +0x168 fp=0xc000093fd0 sp=0xc000093f50 pc=0x10268c698
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000093fd0 sp=0xc000093fd0 pc=0x102696c54
created by runtime.ensureSigM in goroutine 1
        GOROOT/src/runtime/signal_unix.go:1017 +0xd8

goroutine 35 gp=0xc00018cc40 m=3 mp=0xc000081208 [syscall]:
runtime.sigNoteSleep(0xc0003ec7a8?)
        GOROOT/src/runtime/os_darwin.go:132 +0x20 fp=0xc0003ec790 sp=0xc0003ec750 pc=0x102655600
os/signal.signal_recv()
        GOROOT/src/runtime/sigqueue.go:149 +0x2c fp=0xc0003ec7b0 sp=0xc0003ec790 pc=0x10269270c
os/signal.loop()
        GOROOT/src/os/signal/signal_unix.go:23 +0x24 fp=0xc0003ec7d0 sp=0xc0003ec7b0 pc=0x102852014
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc0003ec7d0 sp=0xc0003ec7d0 pc=0x102696c54
created by os/signal.Notify.func1.1 in goroutine 1
        GOROOT/src/os/signal/signal.go:151 +0x50

goroutine 36 gp=0xc00018ce00 m=nil [chan receive]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000180dc0 sp=0xc000180da0 pc=0x10265ad08
runtime.chanrecv(0xc000141f20, 0x0, 0x1)
        GOROOT/src/runtime/chan.go:583 +0x3a8 fp=0xc000180e50 sp=0xc000180dc0 pc=0x102623d48
runtime.chanrecv1(0x0?, 0x0?)
        GOROOT/src/runtime/chan.go:442 +0x14 fp=0xc000180e80 sp=0xc000180e50 pc=0x102623964
github.com/bazelbuild/rules_go/go/tools/bzltestutil.RegisterTimeoutHandler.func1()
        external/rules_go~0.46.0/go/tools/bzltestutil/timeout.go:34 +0x48 fp=0xc000180fd0 sp=0xc000180e80 pc=0x1028a0648
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000180fd0 sp=0xc000180fd0 pc=0x102696c54
created by github.com/bazelbuild/rules_go/go/tools/bzltestutil.RegisterTimeoutHandler in goroutine 1
        external/rules_go~0.46.0/go/tools/bzltestutil/timeout.go:33 +0x164

goroutine 23 gp=0xc00018d180 m=nil [select]:
runtime.gopark(0xc000181f00?, 0x3?, 0x68?, 0x1c?, 0xc000181d6a?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000181bf0 sp=0xc000181bd0 pc=0x10265ad08
runtime.selectgo(0xc000181f00, 0xc000181d64, 0x0?, 0x0, 0x0?, 0x1)
        GOROOT/src/runtime/select.go:327 +0x694 fp=0xc000181d20 sp=0xc000181bf0 pc=0x10266f2f4
github.com/nats-io/nats-server/v2/server.(*Server).startRateLimitLogExpiration.func1()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:4367 +0x2c8 fp=0xc000181f70 sp=0xc000181d20 pc=0x1030e26d8
github.com/nats-io/nats-server/v2/server.(*Server).startGoRoutine.func1()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:3738 +0x68 fp=0xc000181fd0 sp=0xc000181f70 pc=0x1030db2a8
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000181fd0 sp=0xc000181fd0 pc=0x102696c54
created by github.com/nats-io/nats-server/v2/server.(*Server).startGoRoutine in goroutine 37
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:3736 +0x238

goroutine 25 gp=0xc0002936c0 m=nil [select]:
runtime.gopark(0xc0003f5988?, 0x3?, 0xf8?, 0x54?, 0xc0003f565e?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc0003f54d0 sp=0xc0003f54b0 pc=0x10265ad08
runtime.selectgo(0xc0003f5988, 0xc0003f5658, 0xc00000e1c8?, 0x0, 0x103a458e0?, 0x1)
        GOROOT/src/runtime/select.go:327 +0x694 fp=0xc0003f5600 sp=0xc0003f54d0 pc=0x10266f2f4
github.com/nats-io/nats-server/v2/server.(*Server).internalSendLoop(0xc000408008, 0xc00023b9d8)
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/events.go:463 +0x520 fp=0xc0003f5f90 sp=0xc0003f5600 pc=0x102d60f40
github.com/nats-io/nats-server/v2/server.(*Server).setSystemAccount.gowrap1()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:1709 +0x50 fp=0xc0003f5fd0 sp=0xc0003f5f90 pc=0x1030c5310
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc0003f5fd0 sp=0xc0003f5fd0 pc=0x102696c54
created by github.com/nats-io/nats-server/v2/server.(*Server).setSystemAccount in goroutine 37
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:1709 +0x950

goroutine 26 gp=0xc000293880 m=nil [select]:
runtime.gopark(0xc000186f60?, 0x2?, 0x58?, 0x6c?, 0xc000186ddc?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000186c30 sp=0xc000186c10 pc=0x10265ad08
runtime.selectgo(0xc000186f60, 0xc000186dd8, 0x0?, 0x0, 0x0?, 0x1)
        GOROOT/src/runtime/select.go:327 +0x694 fp=0xc000186d60 sp=0xc000186c30 pc=0x10266f2f4
github.com/nats-io/nats-server/v2/server.(*Server).internalReceiveLoop(0xc000408008)
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/events.go:418 +0x218 fp=0xc000186fa0 sp=0xc000186d60 pc=0x102d606e8
github.com/nats-io/nats-server/v2/server.(*Server).setSystemAccount.gowrap2()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:1712 +0x3c fp=0xc000186fd0 sp=0xc000186fa0 pc=0x1030c527c
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000186fd0 sp=0xc000186fd0 pc=0x102696c54
created by github.com/nats-io/nats-server/v2/server.(*Server).setSystemAccount in goroutine 37
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:1712 +0x9e0

goroutine 27 gp=0xc000293a40 m=nil [select]:
runtime.gopark(0xc000504ed0?, 0x3?, 0x18?, 0x4c?, 0xc000504d1a?)
        GOROOT/src/runtime/proc.go:402 +0xc8 fp=0xc000504ba0 sp=0xc000504b80 pc=0x10265ad08
runtime.selectgo(0xc000504ed0, 0xc000504d14, 0x0?, 0x0, 0x0?, 0x1)
        GOROOT/src/runtime/select.go:327 +0x694 fp=0xc000504cd0 sp=0xc000504ba0 pc=0x10266f2f4
github.com/nats-io/nats-server/v2/server.(*Server).startGWReplyMapExpiration.func1()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/gateway.go:3216 +0x218 fp=0xc000504f70 sp=0xc000504cd0 pc=0x102e074f8
github.com/nats-io/nats-server/v2/server.(*Server).startGoRoutine.func1()
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:3738 +0x68 fp=0xc000504fd0 sp=0xc000504f70 pc=0x1030db2a8
runtime.goexit({})
        src/runtime/asm_arm64.s:1222 +0x4 fp=0xc000504fd0 sp=0xc000504fd0 pc=0x102696c54
created by github.com/nats-io/nats-server/v2/server.(*Server).startGoRoutine in goroutine 37
        external/gazelle~override~go_deps~com_github_nats_io_nats_server_v2/server/server.go:3736 +0x238

Expected behavior

Jetstream should not cause the test to fail.

Server and client version

github.com/nats-io/jwt/v2 v2.5.5 github.com/nats-io/nats-server/v2 v2.10.12 github.com/nats-io/nats.go v1.33.1 github.com/nats-io/nkeys v0.4.7

Host environment

Apple M1 Pro macOS 14.3 (23D56) go version go1.22.1 darwin/arm64

Steps to reproduce

Run the following test

package metrics

import (
    "testing"
    "time"

    "github.com/nats-io/nats-server/v2/server"
    "github.com/stretchr/testify/require"
)

func TestRegisterMetrics(t *testing.T) {
    t.Parallel()
    hub := createNatsHub(t)
    hub.Start()
    <-time.After(time.Second)
    defer hub.Shutdown()
}

func createNatsHub(t *testing.T) *server.Server {
    dir := t.TempDir()
    opts := &server.Options{
        Host: "localhost",
        Port: -1,
        // Disables NATS internal signal handling to ensure we own the server lifecycle.
        NoSigs: true,
        LeafNode: server.LeafNodeOpts{
            Host: "localhost",
            Port: -1,
        },
        JetStream: true,
        // Uncomment these lines to get the test to pass
        //JetStreamMaxMemory: 10,
        //JetStreamMaxStore:  10,
        JetStreamDomain: "collector_tests",
        StoreDir:        dir,
    }
    natsHub, err := server.NewServer(opts)
    // This is an internal logger.
    natsHub.SetLogger(utils.Zap{}, true, true)
    require.NoError(t, err)
    return natsHub
}
LaCodon commented 1 month ago

This got fixed in nats-server v2.10.14 with https://github.com/nats-io/nats-server/pull/5265