NVIDIA / dcgm-exporter

NVIDIA GPU metrics exporter for Prometheus leveraging DCGM
Apache License 2.0
906 stars 157 forks source link

Segfaults with dcgm-exporter 3.3.0 and higher #412

Open andrewjamesbrown opened 1 week ago

andrewjamesbrown commented 1 week ago

What is the version?

3.3.0 and higher

What happened?

We're seeing segfaults in our EKS environment (running IPv6 clusters) when running dcgm-exporter 3.3.0 and higher (DCGM 3.3.3+) - we do not see segfaults when running 3.2.0 (DCGM 3.3.0). I have confirmed this happens on all released versions since 3.3.0-3.2.0.

Our nodes are running a mix of Bottlerocket OS 1.25.0 (aws-k8s-1.29-nvidia) and Bottlerocket OS 1.26.1 (aws-k8s-1.29-nvidia)

The segfault is similar to this (from 3.3.3-3.3.0):

time="2024-10-30T19:39:45Z" level=info msg="Starting dcgm-exporter"
time="2024-10-30T19:39:45Z" level=info msg="DCGM successfully initialized!"
time="2024-10-30T19:39:45Z" level=info msg="Collecting DCP Metrics"
time="2024-10-30T19:39:45Z" level=warning msg="Skipping line 26 ('DCGM_FI_PROF_PIPE_FP64_ACTIVE'): metric not enabled"
time="2024-10-30T19:39:45Z" level=info msg="Initializing system entities of type: GPU"
time="2024-10-30T19:39:45Z" level=info msg="Initializing system entities of type: NvSwitch"
time="2024-10-30T19:39:45Z" level=info msg="Not collecting switch metrics: no switches to monitor"
time="2024-10-30T19:39:45Z" level=info msg="Initializing system entities of type: NvLink"
time="2024-10-30T19:39:45Z" level=info msg="Not collecting link metrics: no switches to monitor"
time="2024-10-30T19:39:45Z" level=info msg="Initializing system entities of type: CPU"
SIGSEGV: segmentation violation
PC=0x7f675afd46aa m=3 sigcode=1
signal arrived during cgo execution

goroutine 1 [syscall]:
runtime.cgocall(0x1203070, 0xc0005efec0)
    /usr/local/go/src/runtime/cgocall.go:157 +0x4b fp=0xc0005efe98 sp=0xc0005efe60 pc=0x41714b
github.com/NVIDIA/go-dcgm/pkg/dcgm._Cfunc_dcgmGetCpuHierarchy(0x7fffffff, 0xc00016a000)
    _cgo_gotypes.go:1095 +0x4b fp=0xc0005efec0 sp=0xc0005efe98 pc=0x58888b
github.com/NVIDIA/go-dcgm/pkg/dcgm.GetCpuHierarchy()
    /go/pkg/mod/github.com/!n!v!i!d!i!a/go-dcgm@v0.0.0-20240108230649-3c233ee2a242/pkg/dcgm/cpu.go:42 +0x6b fp=0xc0005f0b70 sp=0xc0005efec0 pc=0x58afeb
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.InitializeCPUInfo({0x0, {{{0x0, {...}, {...}, 0x0, {...}, {...}, {...}, {...}}, {0x0, ...}, ...}, ...}, ...}, ...)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/system_info.go:296 +0x9f fp=0xc0005f0ee0 sp=0xc0005f0b70 pc=0x11cfe1f
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.InitializeSystemInfo({_, {_, _, _}, {_, _, _}}, {0x1, {0x0, 0x0, ...}, ...}, ...)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/system_info.go:440 +0x4f9 fp=0xc0005f9308 sp=0xc0005f0ee0 pc=0x11d1599
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.NewDCGMCollector({0xc00023e000?, 0x2c, 0x2d}, 0xc000459080, 0x2?)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/gpu_collector.go:30 +0x1e5 fp=0xc0005ff7b8 sp=0xc0005f9308 pc=0x11c6d45
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.NewMetricsPipeline(0xc000459080, 0x159f770)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/pipeline.go:58 +0x41d fp=0xc0005ff9a8 sp=0xc0005ff7b8 pc=0x11cc5fd
github.com/NVIDIA/dcgm-exporter/pkg/cmd.action(0xc000328901?)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:245 +0x445 fp=0xc0005ffba0 sp=0xc0005ff9a8 pc=0x12015a5
github.com/NVIDIA/dcgm-exporter/pkg/cmd.NewApp.func1(0xc000458b00?)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:193 +0x13 fp=0xc0005ffbb8 sp=0xc0005ffba0 pc=0x1202bd3
github.com/urfave/cli/v2.(*Command).Run(0xc000458b00, 0xc0003289c0, {0xc00012a120, 0x3, 0x3})
    /go/pkg/mod/github.com/urfave/cli/v2@v2.26.0/command.go:277 +0x9d8 fp=0xc0005ffe48 sp=0xc0005ffbb8 pc=0x11ef5b8
github.com/urfave/cli/v2.(*App).RunContext(0xc0000ac000, {0x16e1608?, 0x210b4a0}, {0xc00012a120, 0x3, 0x3})
    /go/pkg/mod/github.com/urfave/cli/v2@v2.26.0/app.go:335 +0x5db fp=0xc0005ffeb0 sp=0xc0005ffe48 pc=0x11ebefb
github.com/urfave/cli/v2.(*App).Run(0xc000507f20?, {0xc00012a120?, 0x1?, 0x1203e10?})
    /go/pkg/mod/github.com/urfave/cli/v2@v2.26.0/app.go:309 +0x2f fp=0xc0005ffef0 sp=0xc0005ffeb0 pc=0x11eb8cf
main.main()
    /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/main.go:32 +0x5f fp=0xc0005fff40 sp=0xc0005ffef0 pc=0x1202d3f
runtime.main()
    /usr/local/go/src/runtime/proc.go:267 +0x2bb fp=0xc0005fffe0 sp=0xc0005fff40 pc=0x44c7db
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0005fffe8 sp=0xc0005fffe0 pc=0x47c701

goroutine 2 [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000066fa8 sp=0xc000066f88 pc=0x44cc2e
runtime.goparkunlock(...)
    /usr/local/go/src/runtime/proc.go:404
runtime.forcegchelper()
    /usr/local/go/src/runtime/proc.go:322 +0xb3 fp=0xc000066fe0 sp=0xc000066fa8 pc=0x44cab3
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc000066fe8 sp=0xc000066fe0 pc=0x47c701
created by runtime.init.6 in goroutine 1
    /usr/local/go/src/runtime/proc.go:310 +0x1a

goroutine 3 [GC sweep wait]:
runtime.gopark(0x1?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000067778 sp=0xc000067758 pc=0x44cc2e
runtime.goparkunlock(...)
    /usr/local/go/src/runtime/proc.go:404
runtime.bgsweep(0x0?)
    /usr/local/go/src/runtime/mgcsweep.go:321 +0xdf fp=0xc0000677c8 sp=0xc000067778 pc=0x437d1f
runtime.gcenable.func1()
    /usr/local/go/src/runtime/mgc.go:200 +0x25 fp=0xc0000677e0 sp=0xc0000677c8 pc=0x42ce85
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0000677e8 sp=0xc0000677e0 pc=0x47c701
created by runtime.gcenable in goroutine 1
    /usr/local/go/src/runtime/mgc.go:200 +0x66

goroutine 4 [GC scavenge wait]:
runtime.gopark(0xc00004e070?, 0x16c61b0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000067f70 sp=0xc000067f50 pc=0x44cc2e
runtime.goparkunlock(...)
    /usr/local/go/src/runtime/proc.go:404
runtime.(*scavengerState).park(0x20db7e0)
    /usr/local/go/src/runtime/mgcscavenge.go:425 +0x49 fp=0xc000067fa0 sp=0xc000067f70 pc=0x4355a9
runtime.bgscavenge(0x0?)
    /usr/local/go/src/runtime/mgcscavenge.go:658 +0x59 fp=0xc000067fc8 sp=0xc000067fa0 pc=0x435b59
runtime.gcenable.func2()
    /usr/local/go/src/runtime/mgc.go:201 +0x25 fp=0xc000067fe0 sp=0xc000067fc8 pc=0x42ce25
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x47c701
created by runtime.gcenable in goroutine 1
    /usr/local/go/src/runtime/mgc.go:201 +0xa5

goroutine 18 [finalizer wait]:
runtime.gopark(0x198?, 0x14bee20?, 0x1?, 0xdd?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000066620 sp=0xc000066600 pc=0x44cc2e
runtime.runfinq()
    /usr/local/go/src/runtime/mfinal.go:193 +0x107 fp=0xc0000667e0 sp=0xc000066620 pc=0x42bea7
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0000667e8 sp=0xc0000667e0 pc=0x47c701
created by runtime.createfing in goroutine 1
    /usr/local/go/src/runtime/mfinal.go:163 +0x3d

goroutine 19 [chan receive]:
runtime.gopark(0x468e5e?, 0xc000054000?, 0x58?, 0xa0?, 0x86c384692449?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc0000626e0 sp=0xc0000626c0 pc=0x44cc2e
runtime.chanrecv(0xc000090060, 0xc0000627a0, 0x1)
    /usr/local/go/src/runtime/chan.go:583 +0x3cd fp=0xc000062758 sp=0xc0000626e0 pc=0x41952d
runtime.chanrecv2(0x12a05f200?, 0x0?)
    /usr/local/go/src/runtime/chan.go:447 +0x12 fp=0xc000062780 sp=0xc000062758 pc=0x419152
k8s.io/klog/v2.(*loggingT).flushDaemon(0x0?)
    /go/pkg/mod/k8s.io/klog/v2@v2.4.0/klog.go:1169 +0x66 fp=0xc0000627c8 sp=0xc000062780 pc=0x98ba06
k8s.io/klog/v2.init.0.func1()
    /go/pkg/mod/k8s.io/klog/v2@v2.4.0/klog.go:417 +0x25 fp=0xc0000627e0 sp=0xc0000627c8 pc=0x9896a5
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0000627e8 sp=0xc0000627e0 pc=0x47c701
created by k8s.io/klog/v2.init.0 in goroutine 1
    /go/pkg/mod/k8s.io/klog/v2@v2.4.0/klog.go:417 +0xf4

goroutine 37 [GC worker (idle)]:
runtime.gopark(0xc000062fa8?, 0x41900b?, 0x17?, 0x84?, 0xc00031eae0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000062f50 sp=0xc000062f30 pc=0x44cc2e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1295 +0xe5 fp=0xc000062fe0 sp=0xc000062f50 pc=0x42ea05
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc000062fe8 sp=0xc000062fe0 pc=0x47c701
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1219 +0x1c

goroutine 22 [GC worker (idle)]:
runtime.gopark(0x86c25ac9e018?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000063750 sp=0xc000063730 pc=0x44cc2e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1295 +0xe5 fp=0xc0000637e0 sp=0xc000063750 pc=0x42ea05
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0000637e8 sp=0xc0000637e0 pc=0x47c701
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1219 +0x1c

goroutine 38 [GC worker (idle)]:
runtime.gopark(0x86c25ac9e2de?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc0004a4750 sp=0xc0004a4730 pc=0x44cc2e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1295 +0xe5 fp=0xc0004a47e0 sp=0xc0004a4750 pc=0x42ea05
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0004a47e8 sp=0xc0004a47e0 pc=0x47c701
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1219 +0x1c

goroutine 23 [GC worker (idle)]:
runtime.gopark(0x86c25ac9e414?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000063f50 sp=0xc000063f30 pc=0x44cc2e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1295 +0xe5 fp=0xc000063fe0 sp=0xc000063f50 pc=0x42ea05
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc000063fe8 sp=0xc000063fe0 pc=0x47c701
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1219 +0x1c

goroutine 41 [IO wait]:
runtime.gopark(0x1c1d948a4264d9ad?, 0xb?, 0x0?, 0x0?, 0xd?)
    /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc0000dd5c8 sp=0xc0000dd5a8 pc=0x44cc2e
runtime.netpollblock(0x4913b8?, 0x4168e6?, 0x0?)
    /usr/local/go/src/runtime/netpoll.go:564 +0xf7 fp=0xc0000dd600 sp=0xc0000dd5c8 pc=0x4456d7
internal/poll.runtime_pollWait(0x7f6712b1be58, 0x72)
    /usr/local/go/src/runtime/netpoll.go:343 +0x85 fp=0xc0000dd620 sp=0xc0000dd600 pc=0x477065
internal/poll.(*pollDesc).wait(0xc000274700?, 0xc0004ec000?, 0x0)
    /usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc0000dd648 sp=0xc0000dd620 pc=0x4b38a7
internal/poll.(*pollDesc).waitRead(...)
    /usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc000274700, {0xc0004ec000, 0x1980, 0x1980})
    /usr/local/go/src/internal/poll/fd_unix.go:164 +0x27a fp=0xc0000dd6e0 sp=0xc0000dd648 pc=0x4b4b9a
net.(*netFD).Read(0xc000274700, {0xc0004ec000?, 0xc0004ec005?, 0x1a?})
    /usr/local/go/src/net/fd_posix.go:55 +0x25 fp=0xc0000dd728 sp=0xc0000dd6e0 pc=0x5c55c5
net.(*conn).Read(0xc0001280b0, {0xc0004ec000?, 0x75d697?, 0xc00014a538?})
    /usr/local/go/src/net/net.go:179 +0x45 fp=0xc0000dd770 sp=0xc0000dd728 pc=0x5d6605
net.(*TCPConn).Read(0xc0000dd808?, {0xc0004ec000?, 0xc0001325d0?, 0x18?})
    <autogenerated>:1 +0x25 fp=0xc0000dd7a0 sp=0xc0000dd770 pc=0x5e8de5
crypto/tls.(*atLeastReader).Read(0xc0001325d0, {0xc0004ec000?, 0xc0001325d0?, 0x0?})
    /usr/local/go/src/crypto/tls/conn.go:805 +0x3b fp=0xc0000dd7e8 sp=0xc0000dd7a0 pc=0x76149b
bytes.(*Buffer).ReadFrom(0xc00014a628, {0x16ce700, 0xc0001325d0})
    /usr/local/go/src/bytes/buffer.go:211 +0x98 fp=0xc0000dd840 sp=0xc0000dd7e8 pc=0x4ca478
crypto/tls.(*Conn).readFromUntil(0xc00014a380, {0x16ccb00?, 0xc0001280b0}, 0x1980?)
    /usr/local/go/src/crypto/tls/conn.go:827 +0xde fp=0xc0000dd880 sp=0xc0000dd840 pc=0x76167e
crypto/tls.(*Conn).readRecordOrCCS(0xc00014a380, 0x0)
    /usr/local/go/src/crypto/tls/conn.go:625 +0x250 fp=0xc0000ddc20 sp=0xc0000dd880 pc=0x75ec50
crypto/tls.(*Conn).readRecord(...)
    /usr/local/go/src/crypto/tls/conn.go:587
crypto/tls.(*Conn).Read(0xc00014a380, {0xc0004d3000, 0x1000, 0x86fb45?})
    /usr/local/go/src/crypto/tls/conn.go:1369 +0x158 fp=0xc0000ddc90 sp=0xc0000ddc20 pc=0x764f18
bufio.(*Reader).Read(0xc0004ce8a0, {0xc0004c2200, 0x9, 0x20571c0?})
    /usr/local/go/src/bufio/bufio.go:244 +0x197 fp=0xc0000ddcc8 sp=0xc0000ddc90 pc=0x591937
io.ReadAtLeast({0x16cc900, 0xc0004ce8a0}, {0xc0004c2200, 0x9, 0x9}, 0x9)
    /usr/local/go/src/io/io.go:335 +0x90 fp=0xc0000ddd10 sp=0xc0000ddcc8 pc=0x4aee10
io.ReadFull(...)
    /usr/local/go/src/io/io.go:354
golang.org/x/net/http2.readFrameHeader({0xc0004c2200, 0x9, 0xc00006c800?}, {0x16cc900?, 0xc0004ce8a0?})
    /go/pkg/mod/golang.org/x/net@v0.17.0/http2/frame.go:237 +0x65 fp=0xc0000ddd60 sp=0xc0000ddd10 pc=0x859c05
golang.org/x/net/http2.(*Framer).ReadFrame(0xc0004c21c0)
    /go/pkg/mod/golang.org/x/net@v0.17.0/http2/frame.go:498 +0x85 fp=0xc0000dde08 sp=0xc0000ddd60 pc=0x85a345
golang.org/x/net/http2.(*clientConnReadLoop).run(0xc0000ddf98)
    /go/pkg/mod/golang.org/x/net@v0.17.0/http2/transport.go:2275 +0x11f fp=0xc0000ddf60 sp=0xc0000dde08 pc=0x86cadf
golang.org/x/net/http2.(*ClientConn).readLoop(0xc000410180)
    /go/pkg/mod/golang.org/x/net@v0.17.0/http2/transport.go:2170 +0x65 fp=0xc0000ddfc8 sp=0xc0000ddf60 pc=0x86c065
golang.org/x/net/http2.(*Transport).newClientConn.func3()
    /go/pkg/mod/golang.org/x/net@v0.17.0/http2/transport.go:821 +0x25 fp=0xc0000ddfe0 sp=0xc0000ddfc8 pc=0x864ee5
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0000ddfe8 sp=0xc0000ddfe0 pc=0x47c701
created by golang.org/x/net/http2.(*Transport).newClientConn in goroutine 40
    /go/pkg/mod/golang.org/x/net@v0.17.0/http2/transport.go:821 +0xcbe

rax    0x0
rbx    0x7f66f87f8360
rcx    0x7f675b007c0c
rdx    0x1
rdi    0x0
rsi    0x7f67040068c0
rbp    0x4
rsp    0x7f6713b52940
r8     0x90800
r9     0x7f67040068c0
r10    0x0
r11    0x287
r12    0xffffffffffffff78
r13    0x2
r14    0x0
r15    0x0
rip    0x7f675afd46aa
rflags 0x10202
cs     0x33
fs     0x0
gs     0x0

What did you expect to happen?

dcgm-exporter does not crash

What is the GPU model?

AWS NVIDIA A10G Tensor Core GPU running on AWS g5.xlarge instances

# nvidia-smi
Wed Oct 30 20:26:34 2024
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.06             Driver Version: 535.183.06   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  NVIDIA A10G                    On  | 00000000:00:1E.0 Off |                    0 |
|  0%   40C    P0              73W / 300W |   5744MiB / 23028MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+

+---------------------------------------------------------------------------------------+
| Processes:                                                                            |
|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
|        ID   ID                                                             Usage      |
|=======================================================================================|
+---------------------------------------------------------------------------------------+

What is the environment?

Running in EKS 1.29

How did you deploy the dcgm-exporter and what is the configuration?

Using helm

How to reproduce the issue?

Upgrade to a release beyond 3.2.0

Anything else we need to know?

No response

andrewjamesbrown commented 1 week ago

FYI, dcgm-exporter starts properly when I disable the following in the config:

DCGM_FI_DEV_COUNT
DCGM_FI_PROCESS_NAME
DCGM_FI_CUDA_DRIVER_VERSION

If I enable any of those 3 options, we get a segmentation fault.

glowkey commented 1 week ago

Please update to the latest version to see if there is a crash. The specific backtrace you included has been fixed. If there is still a crash please include the full log and the config that causes the crash. Thank you.

andrewjamesbrown commented 1 week ago

@glowkey yes it still crashes on 3.3.8-3.6.0-ubuntu22.04 with the following config (started container with -m dcgm-exporter:datadog-dcgm-exporter-configmap. Disabling the 3 metrics (DCGM_FI_DEV_COUNT, DCGM_FI_PROCESS_NAME and DCGM_FI_CUDA_DRIVER_VERSION) results in a successful bootup.

% k get configmap/datadog-dcgm-exporter-configmap -o yaml
apiVersion: v1
data:
  metrics: |
    # Format
    # If line starts with a '#' it is considered a comment
    # DCGM FIELD                                                      ,Prometheus metric type ,help message

    # Clocks
    DCGM_FI_DEV_SM_CLOCK                                              ,gauge                  ,SM clock frequency (in MHz).
    DCGM_FI_DEV_MEM_CLOCK                                             ,gauge                  ,Memory clock frequency (in MHz).

    # Temperature
    DCGM_FI_DEV_MEMORY_TEMP                                           ,gauge                  ,Memory temperature (in C).
    DCGM_FI_DEV_GPU_TEMP                                              ,gauge                  ,GPU temperature (in C).

    # Power
    DCGM_FI_DEV_POWER_USAGE                                           ,gauge                  ,Power draw (in W).
    DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION                              ,counter                ,Total energy consumption since boot (in mJ).

    # PCIE
    DCGM_FI_DEV_PCIE_REPLAY_COUNTER                                   ,counter                ,Total number of PCIe retries.

    # Utilization (the sample period varies depending on the product)
    DCGM_FI_DEV_GPU_UTIL                                              ,gauge                  ,GPU utilization (in %).
    DCGM_FI_DEV_MEM_COPY_UTIL                                         ,gauge                  ,Memory utilization (in %).
    DCGM_FI_DEV_ENC_UTIL                                              ,gauge                  ,Encoder utilization (in %).
    DCGM_FI_DEV_DEC_UTIL                                              ,gauge                  ,Decoder utilization (in %).

    # Errors and violations
    DCGM_FI_DEV_XID_ERRORS                                            ,gauge                  ,Value of the last XID error encountered.

    # Memory usage
    DCGM_FI_DEV_FB_FREE                                               ,gauge                  ,Framebuffer memory free (in MiB).
    DCGM_FI_DEV_FB_USED                                               ,gauge                  ,Framebuffer memory used (in MiB).

    # NVLink
    DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL                                ,counter                ,Total number of NVLink bandwidth counters for all lanes.

    # VGPU License status
    DCGM_FI_DEV_VGPU_LICENSE_STATUS                                   ,gauge                  ,vGPU License status

    # Remapped rows
    DCGM_FI_DEV_UNCORRECTABLE_REMAPPED_ROWS                           ,counter                ,Number of remapped rows for uncorrectable errors
    DCGM_FI_DEV_CORRECTABLE_REMAPPED_ROWS                             ,counter                ,Number of remapped rows for correctable errors
    DCGM_FI_DEV_ROW_REMAP_FAILURE                                     ,gauge                  ,Whether remapping of rows has failed

    # DCP metrics
    DCGM_FI_PROF_PCIE_TX_BYTES                                        ,counter                ,The number of bytes of active pcie tx data including both header and payload.
    DCGM_FI_PROF_PCIE_RX_BYTES                                        ,counter                ,The number of bytes of active pcie rx data including both header and payload.
    DCGM_FI_PROF_GR_ENGINE_ACTIVE                                     ,gauge                  ,Ratio of time the graphics engine is active (in %).
    DCGM_FI_PROF_SM_ACTIVE                                            ,gauge                  ,The ratio of cycles an SM has at least 1 warp assigned (in %).
    DCGM_FI_PROF_SM_OCCUPANCY                                         ,gauge                  ,The ratio of number of warps resident on an SM (in %).
    DCGM_FI_PROF_PIPE_TENSOR_ACTIVE                                   ,gauge                  ,Ratio of cycles the tensor (HMMA) pipe is active (in %).
    DCGM_FI_PROF_DRAM_ACTIVE                                          ,gauge                  ,Ratio of cycles the device memory interface is active sending or receiving data (in %).
    DCGM_FI_PROF_PIPE_FP64_ACTIVE                                     ,gauge                  ,Ratio of cycles the fp64 pipes are active (in %).
    DCGM_FI_PROF_PIPE_FP32_ACTIVE                                     ,gauge                  ,Ratio of cycles the fp32 pipes are active (in %).
    DCGM_FI_PROF_PIPE_FP16_ACTIVE                                     ,gauge                  ,Ratio of cycles the fp16 pipes are active (in %).

    # Datadog additional recommended fields
    # Enabling this makes dcgm-exporter 3.3.0+ crash
    DCGM_FI_DEV_COUNT                                                 ,counter                ,Number of Devices on the node.
    DCGM_FI_DEV_FAN_SPEED                                             ,gauge                  ,Fan speed for the device in percent 0-100.
    DCGM_FI_DEV_SLOWDOWN_TEMP                                         ,gauge                  ,Slowdown temperature for the device.
    DCGM_FI_DEV_POWER_MGMT_LIMIT                                      ,gauge                  ,Current power limit for the device.
    DCGM_FI_DEV_PSTATE                                                ,gauge                  ,Performance state (P-State) 0-15. 0=highest
    DCGM_FI_DEV_FB_TOTAL                                              ,gauge                  ,
    DCGM_FI_DEV_FB_RESERVED                                           ,gauge                  ,
    DCGM_FI_DEV_FB_USED_PERCENT                                       ,gauge                  ,
    DCGM_FI_DEV_CLOCK_THROTTLE_REASONS                                ,gauge                  ,Current clock throttle reasons (bitmask of DCGM_CLOCKS_THROTTLE_REASON_*)

    # Enabling this makes dcgm-exporter 3.3.0+ crash
    DCGM_FI_PROCESS_NAME                                              ,label                  ,The Process Name.
    # Enabling this makes dcgm-exporter 3.3.0+ crash
    DCGM_FI_CUDA_DRIVER_VERSION                                       ,label                  ,
    DCGM_FI_DEV_NAME                                                  ,label                  ,
    DCGM_FI_DEV_MINOR_NUMBER                                          ,label                  ,
    DCGM_FI_DRIVER_VERSION                                            ,label                  ,
    DCGM_FI_DEV_BRAND                                                 ,label                  ,
    DCGM_FI_DEV_SERIAL                                                ,label                  ,
kind: ConfigMap
metadata:
  creationTimestamp: "2023-11-01T19:15:40Z"
  labels:
    argocd.argoproj.io/instance: dcgm-exporter
  name: datadog-dcgm-exporter-configmap
  namespace: dcgm-exporter
  resourceVersion: "1088216872"
  uid: dccebbe0-d6cf-448b-9d5c-0f4b65320359

Segfault/logs:

2024/10/30 20:01:56 maxprocs: Leaving GOMAXPROCS=32: CPU quota undefined
time="2024-10-30T20:01:56Z" level=info msg="Starting dcgm-exporter"
time="2024-10-30T20:01:56Z" level=info msg="DCGM successfully initialized!"
time="2024-10-30T20:01:56Z" level=info msg="Collecting DCP Metrics"
time="2024-10-30T20:01:56Z" level=warning msg="Skipping line 26 ('DCGM_FI_PROF_PIPE_FP64_ACTIVE'): metric not enabled"
time="2024-10-30T20:01:56Z" level=info msg="Initializing system entities of type: GPU"
time="2024-10-30T20:01:56Z" level=info msg="Initializing system entities of type: NvSwitch"
time="2024-10-30T20:01:56Z" level=info msg="Not collecting NvSwitch metrics; no switches to monitor"
time="2024-10-30T20:01:56Z" level=info msg="Initializing system entities of type: NvLink"
time="2024-10-30T20:01:56Z" level=info msg="Not collecting NvLink metrics; no switches to monitor"
time="2024-10-30T20:01:56Z" level=info msg="Initializing system entities of type: CPU"
SIGSEGV: segmentation violation
PC=0x7f5bb75066aa m=11 sigcode=1 addr=0x4
signal arrived during cgo execution

goroutine 1 gp=0xc0000061c0 m=11 mp=0xc000680008 [syscall]:
runtime.cgocall(0x16dc5c0, 0xc000705c80)
    /usr/local/go/src/runtime/cgocall.go:157 +0x4b fp=0xc000705c58 sp=0xc000705c20 pc=0x418ccb
github.com/NVIDIA/go-dcgm/pkg/dcgm._Cfunc_dcgmGetCpuHierarchy(0x7fffffff, 0xc0004ac000)
    _cgo_gotypes.go:1119 +0x4b fp=0xc000705c80 sp=0xc000705c58 pc=0x7e96cb
github.com/NVIDIA/go-dcgm/pkg/dcgm.GetCpuHierarchy()
    /go/pkg/mod/github.com/!n!v!i!d!i!a/go-dcgm@v0.0.0-20240118201113-3385e277e49f/pkg/dcgm/cpu.go:42 +0x6b fp=0xc000706930 sp=0xc000705c80 pc=0x7ec10b
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.InitializeCPUInfo({0x0, {{{0x0, {...}, {...}, 0x0, {...}, {...}, {...}, {...}}, {0x0, ...}, ...}, ...}, ...}, ...)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/system_info.go:292 +0x9f fp=0xc000706cb0 sp=0xc000706930 pc=0x16cf0bf
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.InitializeSystemInfo({_, {_, _, _}, {_, _, _}}, {0x1, {0x0, 0x0, ...}, ...}, ...)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/system_info.go:448 +0x51c fp=0xc00070f0d0 sp=0xc000706cb0 pc=0x16d09fc
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.GetSystemInfo(0xc000715568, 0x7)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/gpu_collector.go:72 +0x1e8 fp=0xc000715510 sp=0xc00070f0d0 pc=0x16c3a68
github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter.(*FieldEntityGroupTypeSystemInfo).Load(0xc000390340, 0x7)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter/field_entity_group_system_info.go:73 +0x1ba fp=0xc000717838 sp=0xc000715510 pc=0x16c321a
github.com/NVIDIA/dcgm-exporter/pkg/cmd.getFieldEntityGroupTypeSystemInfo(0xc0001e6360, 0xc000348a80)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:424 +0x2e7 fp=0xc000717918 sp=0xc000717838 pc=0x16d9ca7
github.com/NVIDIA/dcgm-exporter/pkg/cmd.startDCGMExporter(0xc00012c540, 0xc00051fb00)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:315 +0x15c fp=0xc000717a88 sp=0xc000717918 pc=0x16d8bdc
github.com/NVIDIA/dcgm-exporter/pkg/cmd.action.func1()
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:287 +0x5b fp=0xc000717ad8 sp=0xc000717a88 pc=0x16d88fb
github.com/NVIDIA/dcgm-exporter/pkg/stdout.Capture({0x1dc9e30, 0xc000158b90}, 0xc000515b90)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/stdout/capture.go:77 +0x1e6 fp=0xc000717b68 sp=0xc000717ad8 pc=0x16d6366
github.com/NVIDIA/dcgm-exporter/pkg/cmd.action(0xc00012c540)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:278 +0x67 fp=0xc000717bc0 sp=0xc000717b68 pc=0x16d8867
github.com/NVIDIA/dcgm-exporter/pkg/cmd.NewApp.func1(0xc00012c540?)
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/cmd/app.go:263 +0x13 fp=0xc000717bd8 sp=0xc000717bc0 pc=0x16dbf33
github.com/urfave/cli/v2.(*Command).Run(0xc0001434a0, 0xc00012c540, {0xc000040120, 0x3, 0x3})
    /go/pkg/mod/github.com/urfave/cli/v2@v2.27.1/command.go:279 +0x97d fp=0xc000717e60 sp=0xc000717bd8 pc=0x80f9dd
github.com/urfave/cli/v2.(*App).RunContext(0xc0001f3200, {0x1dc9d18, 0x2b8ffe0}, {0xc000040120, 0x3, 0x3})
    /go/pkg/mod/github.com/urfave/cli/v2@v2.27.1/app.go:337 +0x58b fp=0xc000717ec0 sp=0xc000717e60 pc=0x80c26b
github.com/urfave/cli/v2.(*App).Run(0xc000515f30?, {0xc000040120?, 0x1?, 0x16dd1e0?})
    /go/pkg/mod/github.com/urfave/cli/v2@v2.27.1/app.go:311 +0x2f fp=0xc000717f00 sp=0xc000717ec0 pc=0x80bc8f
main.main()
    /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/main.go:35 +0x5f fp=0xc000717f50 sp=0xc000717f00 pc=0x16dc1df
runtime.main()
    /usr/local/go/src/runtime/proc.go:271 +0x29d fp=0xc000717fe0 sp=0xc000717f50 pc=0x450edd
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc000717fe8 sp=0xc000717fe0 pc=0x483f61

goroutine 2 gp=0xc000006c40 m=nil [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b6fa8 sp=0xc0000b6f88 pc=0x45130e
runtime.goparkunlock(...)
    /usr/local/go/src/runtime/proc.go:408
runtime.forcegchelper()
    /usr/local/go/src/runtime/proc.go:326 +0xb3 fp=0xc0000b6fe0 sp=0xc0000b6fa8 pc=0x451193
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b6fe8 sp=0xc0000b6fe0 pc=0x483f61
created by runtime.init.6 in goroutine 1
    /usr/local/go/src/runtime/proc.go:314 +0x1a

goroutine 3 gp=0xc000007180 m=nil [GC sweep wait]:
runtime.gopark(0x1?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b7780 sp=0xc0000b7760 pc=0x45130e
runtime.goparkunlock(...)
    /usr/local/go/src/runtime/proc.go:408
runtime.bgsweep(0xc000050070)
    /usr/local/go/src/runtime/mgcsweep.go:318 +0xdf fp=0xc0000b77c8 sp=0xc0000b7780 pc=0x43b9bf
runtime.gcenable.gowrap1()
    /usr/local/go/src/runtime/mgc.go:203 +0x25 fp=0xc0000b77e0 sp=0xc0000b77c8 pc=0x4302c5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b77e8 sp=0xc0000b77e0 pc=0x483f61
created by runtime.gcenable in goroutine 1
    /usr/local/go/src/runtime/mgc.go:203 +0x66

goroutine 4 gp=0xc000007340 m=nil [GC scavenge wait]:
runtime.gopark(0x10000?, 0x1da85e8?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b7f78 sp=0xc0000b7f58 pc=0x45130e
runtime.goparkunlock(...)
    /usr/local/go/src/runtime/proc.go:408
runtime.(*scavengerState).park(0x2b2e080)
    /usr/local/go/src/runtime/mgcscavenge.go:425 +0x49 fp=0xc0000b7fa8 sp=0xc0000b7f78 pc=0x439369
runtime.bgscavenge(0xc000050070)
    /usr/local/go/src/runtime/mgcscavenge.go:658 +0x59 fp=0xc0000b7fc8 sp=0xc0000b7fa8 pc=0x439919
runtime.gcenable.gowrap2()
    /usr/local/go/src/runtime/mgc.go:204 +0x25 fp=0xc0000b7fe0 sp=0xc0000b7fc8 pc=0x430265
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b7fe8 sp=0xc0000b7fe0 pc=0x483f61
created by runtime.gcenable in goroutine 1
    /usr/local/go/src/runtime/mgc.go:204 +0xa5

goroutine 5 gp=0xc000007c00 m=nil [finalizer wait]:
runtime.gopark(0x0?, 0x1c27690?, 0x60?, 0xc1?, 0x2000000020?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b6620 sp=0xc0000b6600 pc=0x45130e
runtime.runfinq()
    /usr/local/go/src/runtime/mfinal.go:194 +0x107 fp=0xc0000b67e0 sp=0xc0000b6620 pc=0x42f307
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b67e8 sp=0xc0000b67e0 pc=0x483f61
created by runtime.createfing in goroutine 1
    /usr/local/go/src/runtime/mfinal.go:164 +0x3d

goroutine 21 gp=0xc0002bafc0 m=nil [GC worker (idle)]:
runtime.gopark(0xc0000b87a8?, 0x41adeb?, 0xf7?, 0xa1?, 0xc0003462a0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b8750 sp=0xc0000b8730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b87e0 sp=0xc0000b8750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b87e8 sp=0xc0000b87e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 36 gp=0xc0003be000 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b2750 sp=0xc0000b2730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b27e0 sp=0xc0000b2750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b27e8 sp=0xc0000b27e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 9 gp=0xc0002bb180 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b8f50 sp=0xc0000b8f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b8fe0 sp=0xc0000b8f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b8fe8 sp=0xc0000b8fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 37 gp=0xc0003be1c0 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b2f50 sp=0xc0000b2f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b2fe0 sp=0xc0000b2f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b2fe8 sp=0xc0000b2fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 38 gp=0xc0003be380 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b3750 sp=0xc0000b3730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b37e0 sp=0xc0000b3750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b37e8 sp=0xc0000b37e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 10 gp=0xc0002bb340 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b9750 sp=0xc0000b9730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b97e0 sp=0xc0000b9750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b97e8 sp=0xc0000b97e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 39 gp=0xc0003be540 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b3f50 sp=0xc0000b3f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b3fe0 sp=0xc0000b3f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b3fe8 sp=0xc0000b3fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 40 gp=0xc0003be700 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b4750 sp=0xc0000b4730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b47e0 sp=0xc0000b4750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b47e8 sp=0xc0000b47e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 22 gp=0xc000102e00 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b8750 sp=0xc0004b8730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b87e0 sp=0xc0004b8750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b87e8 sp=0xc0004b87e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 11 gp=0xc0002bb500 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b9f50 sp=0xc0000b9f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b9fe0 sp=0xc0000b9f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b9fe8 sp=0xc0000b9fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 41 gp=0xc0003be8c0 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b4f50 sp=0xc0000b4f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b4fe0 sp=0xc0000b4f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b4fe8 sp=0xc0000b4fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 12 gp=0xc0002bb6c0 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b4750 sp=0xc0004b4730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b47e0 sp=0xc0004b4750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b47e8 sp=0xc0004b47e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 13 gp=0xc0002bb880 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b4f50 sp=0xc0004b4f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b4fe0 sp=0xc0004b4f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b4fe8 sp=0xc0004b4fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 23 gp=0xc000102fc0 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b8f50 sp=0xc0004b8f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b8fe0 sp=0xc0004b8f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b8fe8 sp=0xc0004b8fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 42 gp=0xc0003bea80 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b5750 sp=0xc0000b5730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b57e0 sp=0xc0000b5750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b57e8 sp=0xc0000b57e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 14 gp=0xc0002bba40 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b5750 sp=0xc0004b5730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b57e0 sp=0xc0004b5750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b57e8 sp=0xc0004b57e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 43 gp=0xc0003bec40 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0000b5f50 sp=0xc0000b5f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0000b5fe0 sp=0xc0000b5f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0000b5fe8 sp=0xc0000b5fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 24 gp=0xc000103180 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b9750 sp=0xc0004b9730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b97e0 sp=0xc0004b9750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b97e8 sp=0xc0004b97e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 15 gp=0xc0002bbc00 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b5f50 sp=0xc0004b5f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b5fe0 sp=0xc0004b5f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b5fe8 sp=0xc0004b5fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 44 gp=0xc0003bee00 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c6750 sp=0xc0003c6730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0003c67e0 sp=0xc0003c6750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c67e8 sp=0xc0003c67e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 25 gp=0xc000103340 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b9f50 sp=0xc0004b9f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b9fe0 sp=0xc0004b9f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b9fe8 sp=0xc0004b9fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 45 gp=0xc0003befc0 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c6f50 sp=0xc0003c6f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0003c6fe0 sp=0xc0003c6f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c6fe8 sp=0xc0003c6fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 26 gp=0xc000103500 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004ba750 sp=0xc0004ba730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004ba7e0 sp=0xc0004ba750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004ba7e8 sp=0xc0004ba7e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 16 gp=0xc0002bbdc0 m=nil [GC worker (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b6750 sp=0xc0004b6730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b67e0 sp=0xc0004b6750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b67e8 sp=0xc0004b67e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 46 gp=0xc0003bf180 m=nil [GC worker (idle)]:
runtime.gopark(0x57c2ff456570?, 0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c7750 sp=0xc0003c7730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0003c77e0 sp=0xc0003c7750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c77e8 sp=0xc0003c77e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 27 gp=0xc0001036c0 m=nil [GC worker (idle)]:
runtime.gopark(0x2b91a40?, 0x1?, 0x8a?, 0xaf?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004baf50 sp=0xc0004baf30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004bafe0 sp=0xc0004baf50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004bafe8 sp=0xc0004bafe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 47 gp=0xc0003bf340 m=nil [GC worker (idle)]:
runtime.gopark(0x57c2ff455dc8?, 0x1?, 0xdc?, 0xd8?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c7f50 sp=0xc0003c7f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0003c7fe0 sp=0xc0003c7f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c7fe8 sp=0xc0003c7fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 28 gp=0xc000103880 m=nil [GC worker (idle)]:
runtime.gopark(0x2b91a40?, 0x1?, 0x97?, 0x98?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004bb750 sp=0xc0004bb730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004bb7e0 sp=0xc0004bb750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004bb7e8 sp=0xc0004bb7e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 29 gp=0xc000103a40 m=nil [GC worker (idle)]:
runtime.gopark(0x2b91a40?, 0x1?, 0x69?, 0xac?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004bbf50 sp=0xc0004bbf30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004bbfe0 sp=0xc0004bbf50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004bbfe8 sp=0xc0004bbfe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 50 gp=0xc000348000 m=nil [GC worker (idle)]:
runtime.gopark(0x2b91a40?, 0x1?, 0x26?, 0x16?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0004b6f50 sp=0xc0004b6f30 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0004b6fe0 sp=0xc0004b6f50 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0004b6fe8 sp=0xc0004b6fe0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 48 gp=0xc0003bf500 m=nil [GC worker (idle)]:
runtime.gopark(0x57c2ff45be50?, 0x1?, 0x1c?, 0x8d?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c8750 sp=0xc0003c8730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0003c87e0 sp=0xc0003c8750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c87e8 sp=0xc0003c87e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 30 gp=0xc000103c00 m=nil [GC worker (idle)]:
runtime.gopark(0x57c2ff45d444?, 0x1?, 0x40?, 0xbe?, 0x0?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c2750 sp=0xc0003c2730 pc=0x45130e
runtime.gcBgMarkWorker()
    /usr/local/go/src/runtime/mgc.go:1310 +0xe5 fp=0xc0003c27e0 sp=0xc0003c2750 pc=0x4323a5
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c27e8 sp=0xc0003c27e0 pc=0x483f61
created by runtime.gcBgMarkStartWorkers in goroutine 1
    /usr/local/go/src/runtime/mgc.go:1234 +0x1c

goroutine 51 gp=0xc0003488c0 m=nil [IO wait]:
runtime.gopark(0x5?, 0x0?, 0x0?, 0x0?, 0xb?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0003c4c98 sp=0xc0003c4c78 pc=0x45130e
runtime.netpollblock(0x499778?, 0x418466?, 0x0?)
    /usr/local/go/src/runtime/netpoll.go:573 +0xf7 fp=0xc0003c4cd0 sp=0xc0003c4c98 pc=0x44a077
internal/poll.runtime_pollWait(0x7f5b6e90e6d0, 0x72)
    /usr/local/go/src/runtime/netpoll.go:345 +0x85 fp=0xc0003c4cf0 sp=0xc0003c4cd0 pc=0x47e7a5
internal/poll.(*pollDesc).wait(0xc00011f680?, 0xc0001f1000?, 0x1)
    /usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc0003c4d18 sp=0xc0003c4cf0 pc=0x4c0927
internal/poll.(*pollDesc).waitRead(...)
    /usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc00011f680, {0xc0001f1000, 0x1000, 0x1000})
    /usr/local/go/src/internal/poll/fd_unix.go:164 +0x27a fp=0xc0003c4db0 sp=0xc0003c4d18 pc=0x4c1c1a
os.(*File).read(...)
    /usr/local/go/src/os/file_posix.go:29
os.(*File).Read(0xc0000bbc70, {0xc0001f1000?, 0x0?, 0x0?})
    /usr/local/go/src/os/file.go:118 +0x52 fp=0xc0003c4df0 sp=0xc0003c4db0 pc=0x4cd072
bufio.(*Scanner).Scan(0xc00013dc00)
    /usr/local/go/src/bufio/scan.go:219 +0x81e fp=0xc0003c4ec8 sp=0xc0003c4df0 pc=0x54f15e
github.com/NVIDIA/dcgm-exporter/pkg/stdout.Capture.func2()
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/stdout/capture.go:59 +0x47 fp=0xc0003c4fe0 sp=0xc0003c4ec8 pc=0x16d6467
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0003c4fe8 sp=0xc0003c4fe0 pc=0x483f61
created by github.com/NVIDIA/dcgm-exporter/pkg/stdout.Capture in goroutine 1
    /go/src/github.com/NVIDIA/dcgm-exporter/pkg/stdout/capture.go:58 +0x1d9

goroutine 56 gp=0xc000349500 m=nil [IO wait]:
runtime.gopark(0xa9181505339986da?, 0x65e92d1eb2f69ad4?, 0xda?, 0x86?, 0xb?)
    /usr/local/go/src/runtime/proc.go:402 +0xce fp=0xc0002a96f8 sp=0xc0002a96d8 pc=0x45130e
runtime.netpollblock(0x499778?, 0x418466?, 0x0?)
    /usr/local/go/src/runtime/netpoll.go:573 +0xf7 fp=0xc0002a9730 sp=0xc0002a96f8 pc=0x44a077
internal/poll.runtime_pollWait(0x7f5b6e90e4e0, 0x72)
    /usr/local/go/src/runtime/netpoll.go:345 +0x85 fp=0xc0002a9750 sp=0xc0002a9730 pc=0x47e7a5
internal/poll.(*pollDesc).wait(0xc00013dd80?, 0xc0003cc000?, 0x0)
    /usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc0002a9778 sp=0xc0002a9750 pc=0x4c0927
internal/poll.(*pollDesc).waitRead(...)
    /usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc00013dd80, {0xc0003cc000, 0x1980, 0x1980})
    /usr/local/go/src/internal/poll/fd_unix.go:164 +0x27a fp=0xc0002a9810 sp=0xc0002a9778 pc=0x4c1c1a
net.(*netFD).Read(0xc00013dd80, {0xc0003cc000?, 0x7f5b6e0c85e8?, 0xc00038cd20?})
    /usr/local/go/src/net/fd_posix.go:55 +0x25 fp=0xc0002a9858 sp=0xc0002a9810 pc=0x648885
net.(*conn).Read(0xc0000bbdc8, {0xc0003cc000?, 0xc0002a9938?, 0x42279b?})
    /usr/local/go/src/net/net.go:185 +0x45 fp=0xc0002a98a0 sp=0xc0002a9858 pc=0x659745
net.(*TCPConn).Read(0xc000005340?, {0xc0003cc000?, 0xc0002a9948?, 0x448209?})
    <autogenerated>:1 +0x25 fp=0xc0002a98d0 sp=0xc0002a98a0 pc=0x66b6a5
crypto/tls.(*atLeastReader).Read(0xc00038cd20, {0xc0003cc000?, 0x0?, 0xc00038cd20?})
    /usr/local/go/src/crypto/tls/conn.go:806 +0x3b fp=0xc0002a9918 sp=0xc0002a98d0 pc=0x6a70bb
bytes.(*Buffer).ReadFrom(0xc000005430, {0x1db2880, 0xc00038cd20})
    /usr/local/go/src/bytes/buffer.go:211 +0x98 fp=0xc0002a9970 sp=0xc0002a9918 pc=0x51a3f8
crypto/tls.(*Conn).readFromUntil(0xc000005188, {0x1db0960, 0xc0000bbdc8}, 0xc0002a9980?)
    /usr/local/go/src/crypto/tls/conn.go:828 +0xde fp=0xc0002a99a8 sp=0xc0002a9970 pc=0x6a729e
crypto/tls.(*Conn).readRecordOrCCS(0xc000005188, 0x0)
    /usr/local/go/src/crypto/tls/conn.go:626 +0x3cf fp=0xc0002a9c28 sp=0xc0002a99a8 pc=0x6a43af
crypto/tls.(*Conn).readRecord(...)
    /usr/local/go/src/crypto/tls/conn.go:588
crypto/tls.(*Conn).Read(0xc000005188, {0xc0002b1000, 0x1000, 0xc000348fc0?})
    /usr/local/go/src/crypto/tls/conn.go:1370 +0x156 fp=0xc0002a9c98 sp=0xc0002a9c28 pc=0x6aac56
bufio.(*Reader).Read(0xc000347e00, {0xc0001cf000, 0x9, 0x2a8d260?})
    /usr/local/go/src/bufio/bufio.go:241 +0x197 fp=0xc0002a9cd0 sp=0xc0002a9c98 pc=0x54c717
io.ReadAtLeast({0x1db0640, 0xc000347e00}, {0xc0001cf000, 0x9, 0x9}, 0x9)
    /usr/local/go/src/io/io.go:335 +0x90 fp=0xc0002a9d18 sp=0xc0002a9cd0 pc=0x4ba5b0
io.ReadFull(...)
    /usr/local/go/src/io/io.go:354
golang.org/x/net/http2.readFrameHeader({0xc0001cf000, 0x9, 0xc0002a9dc0?}, {0x1db0640?, 0xc000347e00?})
    /go/pkg/mod/golang.org/x/net@v0.26.0/http2/frame.go:237 +0x65 fp=0xc0002a9d68 sp=0xc0002a9d18 pc=0x861d85
golang.org/x/net/http2.(*Framer).ReadFrame(0xc0001cefc0)
    /go/pkg/mod/golang.org/x/net@v0.26.0/http2/frame.go:501 +0x85 fp=0xc0002a9e10 sp=0xc0002a9d68 pc=0x8624c5
golang.org/x/net/http2.(*clientConnReadLoop).run(0xc0002a9fa8)
    /go/pkg/mod/golang.org/x/net@v0.26.0/http2/transport.go:2358 +0xda fp=0xc0002a9f60 sp=0xc0002a9e10 pc=0x8752ba
golang.org/x/net/http2.(*ClientConn).readLoop(0xc00024e180)
    /go/pkg/mod/golang.org/x/net@v0.26.0/http2/transport.go:2254 +0x8b fp=0xc0002a9fc8 sp=0xc0002a9f60 pc=0x87486b
golang.org/x/net/http2.(*Transport).newClientConn.gowrap1()
    /go/pkg/mod/golang.org/x/net@v0.26.0/http2/transport.go:869 +0x25 fp=0xc0002a9fe0 sp=0xc0002a9fc8 pc=0x86d545
runtime.goexit({})
    /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc0002a9fe8 sp=0xc0002a9fe0 pc=0x483f61
created by golang.org/x/net/http2.(*Transport).newClientConn in goroutine 55
    /go/pkg/mod/golang.org/x/net@v0.26.0/http2/transport.go:869 +0xd1b

rax    0x0
rbx    0x7f5b3c4c4380
rcx    0x7f5bb7539a1c
rdx    0x1
rdi    0x0
rsi    0x7f5b40000fc0
rbp    0x4
rsp    0x7f5b47ffd930
r8     0x90800
r9     0x7f5b40000fc0
r10    0x0
r11    0x287
r12    0xffffffffffffff78
r13    0x2
r14    0x0
r15    0x0
rip    0x7f5bb75066aa
rflags 0x10206
cs     0x33
fs     0x0
gs     0x0
glowkey commented 10 hours ago

We have not been able to repro this or issue #409 but are working to determine the cause.