Closed axinojolais closed 7 years ago
thanks for the report, ctrl-\ would be helpful, do you happen to know of a way to get it to reproduce?
Nope, found no way to do that sorry. Now that I think about it, ctrl+\ just sends a SIGQUIT right ? Perhaps there are other ways to get a stack trace without having a PTY ? I'll try different things when I get a new occurrence.
yes, you can kill the process by kill -3 <pid>
and you should get the stack dump
Got it. Note that this is with a slightly different config (inputs.snmp replaced by inputs.haproxy) :
SIGQUIT: quit
PC=0x465931 m=0
goroutine 0 [idle]:
runtime.futex(0x20d0ae8, 0x0, 0x0, 0x0, 0x0, 0x20cf810, 0x0, 0x0, 0x413624, 0x20d0ae8, ...)
#011/usr/lib/go-1.6/src/runtime/sys_linux_amd64.s:306 +0x21
runtime.futexsleep(0x20d0ae8, 0x0, 0xffffffffffffffff)
#011/usr/lib/go-1.6/src/runtime/os1_linux.go:40 +0x53
runtime.notesleep(0x20d0ae8)
#011/usr/lib/go-1.6/src/runtime/lock_futex.go:145 +0xa4
runtime.stopm()
#011/usr/lib/go-1.6/src/runtime/proc.go:1538 +0x10b
runtime.findrunnable(0xc820017500, 0x0)
#011/usr/lib/go-1.6/src/runtime/proc.go:1976 +0x739
runtime.schedule()
#011/usr/lib/go-1.6/src/runtime/proc.go:2075 +0x24f
runtime.park_m(0xc820184c00)
#011/usr/lib/go-1.6/src/runtime/proc.go:2140 +0x18b
runtime.mcall(0x7fff0dfc5d20)
#011/usr/lib/go-1.6/src/runtime/asm_amd64.s:233 +0x5b
goroutine 1 [semacquire, 11378 minutes]:
sync.runtime_Semacquire(0xc82046f09c)
#011/usr/lib/go-1.6/src/runtime/sema.go:47 +0x26
sync.(*WaitGroup).Wait(0xc82046f090)
#011/usr/lib/go-1.6/src/sync/waitgroup.go:127 +0xb4
github.com/influxdata/telegraf/agent.(*Agent).Run(0xc82021c070, 0xc82000a9c0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:362 +0xa07
main.reloadLoop(0xc82000a900, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:273 +0x2350
main.main()
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:321 +0x5d
goroutine 17 [syscall, 11378 minutes, locked to thread]:
runtime.goexit()
#011/usr/lib/go-1.6/src/runtime/asm_amd64.s:1998 +0x1
goroutine 5 [syscall, 11378 minutes]:
os/signal.signal_recv(0x0)
#011/usr/lib/go-1.6/src/runtime/sigqueue.go:116 +0x132
os/signal.loop()
#011/usr/lib/go-1.6/src/os/signal/signal_unix.go:22 +0x18
created by os/signal.init.1
#011/usr/lib/go-1.6/src/os/signal/signal_unix.go:28 +0x37
goroutine 67 [select]:
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0c30, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:139 +0x57a
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0c30, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 66 [select]:
github.com/influxdata/telegraf/agent.(*Agent).flusher(0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:265 +0x3dc
github.com/influxdata/telegraf/agent.(*Agent).Run.func1(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:341 +0x7b
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:345 +0x8d5
goroutine 33 [runnable]:
net.runtime_pollWait(0x7ff63bd707b0, 0x72, 0x0)
#011/usr/lib/go-1.6/src/runtime/netpoll.go:160 +0x60
net.(*pollDesc).Wait(0xc820354060, 0x72, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/fd_poll_runtime.go:73 +0x3a
net.(*pollDesc).WaitRead(0xc820354060, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/fd_poll_runtime.go:78 +0x36
net.(*netFD).accept(0xc820354000, 0x0, 0x7ff63bd708a8, 0xc8206bbb00)
#011/usr/lib/go-1.6/src/net/fd_unix.go:426 +0x27c
net.(*TCPListener).AcceptTCP(0xc820032000, 0x459420, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/tcpsock_posix.go:254 +0x4d
net/http.tcpKeepAliveListener.Accept(0xc820032000, 0x0, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/http/server.go:2427 +0x41
net/http.(*Server).Serve(0xc820348080, 0x7ff63bd70870, 0xc820032000, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/http/server.go:2117 +0x129
net/http.(*Server).ListenAndServe(0xc820348080, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/http/server.go:2098 +0x136
created by github.com/influxdata/telegraf/plugins/outputs/prometheus_client.(*PrometheusClient).Start
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/outputs/prometheus_client/prometheus_client.go:52 +0x1f3
goroutine 50 [select, 11378 minutes, locked to thread]:
runtime.gopark(0x179d368, 0xc820459728, 0x14d08e8, 0x6, 0x18, 0x2)
#011/usr/lib/go-1.6/src/runtime/proc.go:262 +0x163
runtime.selectgoImpl(0xc820459728, 0x0, 0x18)
#011/usr/lib/go-1.6/src/runtime/select.go:392 +0xa67
runtime.selectgo(0xc820459728)
#011/usr/lib/go-1.6/src/runtime/select.go:215 +0x12
runtime.ensureSigM.func1()
#011/usr/lib/go-1.6/src/runtime/signal1_unix.go:279 +0x358
runtime.goexit()
#011/usr/lib/go-1.6/src/runtime/asm_amd64.s:1998 +0x1
goroutine 51 [select, 11378 minutes]:
main.reloadLoop.func3(0xc82000aa20, 0xc82000a9c0, 0xc820225e30, 0xc82000a900)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:241 +0x22d
created by main.reloadLoop
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:255 +0x1c1d
goroutine 68 [select]:
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0cc0, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:139 +0x57a
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0cc0, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 69 [select]:
github.com/influxdata/telegraf/agent.gatherWithTimeout(0xc82000a9c0, 0xc8201c0d50, 0xc820693410, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:167 +0x50d
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0d50, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:128 +0x25b
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0d50, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 70 [select]:
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0db0, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:139 +0x57a
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0db0, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 71 [select]:
github.com/influxdata/telegraf/agent.gatherWithTimeout(0xc82000a9c0, 0xc8201c0e10, 0xc8205b4c90, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:167 +0x50d
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0e10, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:128 +0x25b
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0e10, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 72 [select]:
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0f30, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:139 +0x57a
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0f30, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 73 [select]:
github.com/influxdata/telegraf/agent.gatherWithTimeout(0xc82000a9c0, 0xc8201c0f90, 0xc820c6bd10, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:167 +0x50d
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c0f90, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:128 +0x25b
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c0f90, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 74 [select]:
github.com/influxdata/telegraf/agent.gatherWithTimeout(0xc82000a9c0, 0xc8201c1020, 0xc82069e1b0, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:167 +0x50d
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c1020, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:128 +0x25b
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c1020, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 75 [select]:
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc82021c070, 0xc82000a9c0, 0xc8201c1440, 0x2540be400, 0xc82000acc0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:139 +0x57a
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046f090, 0xc82021c070, 0xc82000a9c0, 0xc82000acc0, 0xc8201c1440, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 1894522 [runnable]:
bufio.(*Reader).ReadString(0xc82071d348, 0xc82071d30a, 0x0, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/bufio/bufio.go:447 +0x9e
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common.ReadLinesOffsetN(0xc82075a830, 0xd, 0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common/common.go:99 +0x256
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common.ReadLines(0xc82075a830, 0xd, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common/common.go:80 +0x61
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/net.processInet(0xc82075a830, 0xd, 0x100000002, 0x14d1660, 0x3, 0xc820692990, 0x0, 0x0, 0x0, 0x0, ...)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/net/net_linux.go:515 +0x183
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/net.ConnectionsPid(0x14c9e70, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/net/net_linux.go:327 +0xbee
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/net.Connections(0x14c9e70, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/net/net_linux.go:291 +0x50
github.com/influxdata/telegraf/plugins/inputs/system.(*systemPS).NetConnections(0x20f4db8, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/ps.go:114 +0x49
github.com/influxdata/telegraf/plugins/inputs/system.(*NetStats).Gather(0xc82046ead0, 0x7ff63bdaf080, 0xc820693410, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/netstat.go:26 +0x7b
github.com/influxdata/telegraf/agent.gatherWithTimeout.func1(0xc8206fa3c0, 0xc8201c0d50, 0xc820693410)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:163 +0x73
created by github.com/influxdata/telegraf/agent.gatherWithTimeout
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:164 +0xe0
goroutine 1894468 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.gatherWithTimeout
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:164 +0xe0
goroutine 1894611 [runnable]:
syscall.Syscall(0x0, 0xc, 0xc820879248, 0x8, 0x0, 0x8, 0x0)
#011/usr/lib/go-1.6/src/syscall/asm_linux_amd64.s:18 +0x5
syscall.readlen(0xc, 0xc820879248, 0x8, 0x5, 0x0, 0x0)
#011/usr/lib/go-1.6/src/syscall/zsyscall_linux_amd64.go:1085 +0x51
syscall.forkExec(0x14d9e70, 0xd, 0xc8205d05c0, 0x4, 0x4, 0xc820879418, 0x509d, 0x0, 0x0)
#011/usr/lib/go-1.6/src/syscall/exec_unix.go:202 +0x567
syscall.StartProcess(0x14d9e70, 0xd, 0xc8205d05c0, 0x4, 0x4, 0xc820879418, 0x2, 0x4, 0x0, 0x0)
#011/usr/lib/go-1.6/src/syscall/exec_unix.go:240 +0x6b
os.startProcess(0x14d9e70, 0xd, 0xc8205d05c0, 0x4, 0x4, 0xc820879650, 0xc8203e4540, 0x0, 0x0)
#011/usr/lib/go-1.6/src/os/exec_posix.go:45 +0x33e
os.StartProcess(0x14d9e70, 0xd, 0xc8205d05c0, 0x4, 0x4, 0xc820879650, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/os/doc.go:28 +0x69
os/exec.(*Cmd).Start(0xc820aca140, 0x0, 0x0)
#011/usr/lib/go-1.6/src/os/exec/exec.go:332 +0x761
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common.CombinedOutputTimeout(0xc820aca140, 0xb2d05e00, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common/common.go:315 +0x100
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common.Invoke.Command(0x14d9e70, 0xd, 0xc82069e0f0, 0x3, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common/common.go:40 +0x8a
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/internal/common.(*Invoke).Command(0x20f4db8, 0x14d9e70, 0xd, 0xc82069e0f0, 0x3, 0x3, 0x0, 0x0, 0x0, 0x0, ...)
#011<autogenerated>:8 +0xee
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/disk.GetDiskSerialNumber(0xc82087632d, 0x3, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/disk/disk_linux.go:348 +0x2a1
github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/disk.IOCounters(0x5282f1, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/shirou/gopsutil/disk/disk_linux.go:333 +0x789
github.com/influxdata/telegraf/plugins/inputs/system.(*systemPS).DiskIO(0x20f4db8, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/ps.go:118 +0x28
github.com/influxdata/telegraf/plugins/inputs/system.(*DiskIOStats).Gather(0xc8201c0fc0, 0x7ff63bdaf080, 0xc82069e1b0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/disk.go:104 +0x67
github.com/influxdata/telegraf/agent.gatherWithTimeout.func1(0xc8203e41e0, 0xc8201c1020, 0xc82069e1b0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:163 +0x73
created by github.com/influxdata/telegraf/agent.gatherWithTimeout
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:164 +0xe0
goroutine 70524 [select]:
net/http.(*persistConn).writeLoop(0xc82065de10)
#011/usr/lib/go-1.6/src/net/http/transport.go:1277 +0x472
created by net/http.(*Transport).dialConn
#011/usr/lib/go-1.6/src/net/http/transport.go:858 +0x10cb
goroutine 70523 [runnable]:
net.runtime_pollWait(0x7ff63bd706f0, 0x72, 0xc8209b9000)
#011/usr/lib/go-1.6/src/runtime/netpoll.go:160 +0x60
net.(*pollDesc).Wait(0xc8205a4ca0, 0x72, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/fd_poll_runtime.go:73 +0x3a
net.(*pollDesc).WaitRead(0xc8205a4ca0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/fd_poll_runtime.go:78 +0x36
net.(*netFD).Read(0xc8205a4c40, 0xc8209b9000, 0x1000, 0x1000, 0x0, 0x7ff63bded050, 0xc82000c150)
#011/usr/lib/go-1.6/src/net/fd_unix.go:250 +0x23a
net.(*conn).Read(0xc82044c020, 0xc8209b9000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/net.go:172 +0xe4
net/http.noteEOFReader.Read(0x7ff6384d8000, 0xc82044c020, 0xc82065de78, 0xc8209b9000, 0x1000, 0x1000, 0x408d73, 0x0, 0x0)
#011/usr/lib/go-1.6/src/net/http/transport.go:1687 +0x67
net/http.(*noteEOFReader).Read(0xc820529960, 0xc8209b9000, 0x1000, 0x1000, 0xc820705d1d, 0x0, 0x0)
#011<autogenerated>:284 +0xd0
bufio.(*Reader).fill(0xc820be0360)
#011/usr/lib/go-1.6/src/bufio/bufio.go:97 +0x1e9
bufio.(*Reader).Peek(0xc820be0360, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/bufio/bufio.go:132 +0xcc
net/http.(*persistConn).readLoop(0xc82065de10)
#011/usr/lib/go-1.6/src/net/http/transport.go:1073 +0x177
created by net/http.(*Transport).dialConn
#011/usr/lib/go-1.6/src/net/http/transport.go:857 +0x10a6
goroutine 1894549 [runnable]:
syscall.Syscall(0x4, 0xc8208fd140, 0xc82091c038, 0x0, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/syscall/asm_linux_amd64.s:18 +0x5
syscall.Stat(0xc820416beb, 0x14, 0xc82091c038, 0x0, 0x0)
#011/usr/lib/go-1.6/src/syscall/zsyscall_linux_amd64.go:1529 +0x9e
os.Stat(0xc820416beb, 0x14, 0x0, 0x0, 0x0, 0x0)
#011/usr/lib/go-1.6/src/os/file_unix.go:169 +0x6f
github.com/influxdata/telegraf/plugins/inputs/system.(*systemPS).DiskUsage(0x20f4db8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/ps.go:85 +0x624
github.com/influxdata/telegraf/plugins/inputs/system.(*DiskStats).Gather(0xc82000a480, 0x7ff63bdaf080, 0xc820c6bd10, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/disk.go:44 +0xd9
github.com/influxdata/telegraf/agent.gatherWithTimeout.func1(0xc820646600, 0xc8201c0f90, 0xc820c6bd10)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:163 +0x73
created by github.com/influxdata/telegraf/agent.gatherWithTimeout
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:164 +0xe0
rax 0xca
rbx 0x0
rcx 0x465933
rdx 0x0
rdi 0x20d0ae8
rsi 0x0
rbp 0x1
rsp 0x7fff0dfc5b80
r8 0x0
r9 0x0
r10 0x0
r11 0x286
r12 0x8
r13 0x179739e
r14 0x24
r15 0x8
rip 0x465931
rflags 0x286
cs 0x33
fs 0x0
gs 0x0
was this process hanging as well? I can't seem to find anything out of the ordinary in the stack trace.
What exactly happens when it hangs? is it just the prometheus output or is it also that the rest of telegraf stops collecting metrics? Would it be possible for you to setup a dummy outputs.file
output and see if it also hangs?
Yes it was hung, prometheus output times out and also the collection of metrics stop (to be precise, telegraf stops logging anything and only does futex/clock_gettime syscalls).
I can try setting up a dummy outputs.file, sure
Hi,
I have another hang, however not on a machine where I configured outputs.file.
I ran time strace -fc
on the process for one minute, and I got :
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
97.55 6.252000 3126000 2 2 restart_syscall
2.26 0.144623 1 176232 176232 futex
0.19 0.012182 0 352468 clock_gettime
0.00 0.000000 0 1 select
0.00 0.000000 0 1 epoll_wait
------ ----------- ----------- --------- --------- ----------------
100.00 6.408805 528704 176234 total
real 1m3.181s
user 0m2.128s
sys 0m16.948s
So I'm pretty sure it's not gathering metrics anymore.
The process is still up, if you want me to run commands on it ?
Hi,
This is still happening by the way. Anything else I could do to help ?
Thanks
would help if you could narrow it down to a specific plugin, with another stack trace
Hello,
This is happening for me as well under slightly different circumstances. We're not using Prometheus, but we are outputting to Kafka. We're using the docker, cpu, mem, processes, system, disk, dns_query, netstat, net_response, net, and disk_io plugins. This appears to happen at least once daily without any logs present in STDOUT or STDERR. We do have an instance that has been running without issues that doesn't do any host metrics collect, but instead monitors sevices such as Zookeeper and ElasticSearch.
Comparing our config vs. the OPs, you may be able to narrow it down to one or more of the following: cpu, disk, diskio, mem, netstat, system.
Unfortunately, I can't get any stack traces right now as there's another problem on this system that's causing issues. Given the somewhat random nature of this I'm wondering if there's a deadlock occurring somewhere. I know the go toolchain has options to detect this. Are you currently using that in your tests?
As soon as I can, I'll try to get more information.
New stack trace. This time, telegraf was logging the following every 10s for some time, then it just stopped logging it altogether. The machine never ran out of resources though :
ERROR: input [diskio] took longer to collect than collection interval (10s)
ERROR: input [system] took longer to collect than collection interval (10s)
Stack trace :
SIGQUIT: quit
PC=0x465931 m=0
goroutine 0 [idle]:
runtime.futex(0x20d0ae8, 0x0, 0x0, 0x0, 0x0, 0x20cf810, 0x0, 0x0, 0x413624, 0x20d0ae8, ...)
#011/usr/lib/go-1.6/src/runtime/sys_linux_amd64.s:306 +0x21
runtime.futexsleep(0x20d0ae8, 0x0, 0xffffffffffffffff)
#011/usr/lib/go-1.6/src/runtime/os1_linux.go:40 +0x53
runtime.notesleep(0x20d0ae8)
#011/usr/lib/go-1.6/src/runtime/lock_futex.go:145 +0xa4
runtime.stopm()
#011/usr/lib/go-1.6/src/runtime/proc.go:1538 +0x10b
runtime.findrunnable(0xc820012000, 0x0)
#011/usr/lib/go-1.6/src/runtime/proc.go:1976 +0x739
runtime.schedule()
#011/usr/lib/go-1.6/src/runtime/proc.go:2075 +0x24f
runtime.park_m(0xc820084900)
#011/usr/lib/go-1.6/src/runtime/proc.go:2140 +0x18b
runtime.mcall(0x7ffcb07b4f70)
#011/usr/lib/go-1.6/src/runtime/asm_amd64.s:233 +0x5b
goroutine 1 [semacquire, 34097 minutes]:
sync.runtime_Semacquire(0xc82046af2c)
#011/usr/lib/go-1.6/src/runtime/sema.go:47 +0x26
sync.(*WaitGroup).Wait(0xc82046af20)
#011/usr/lib/go-1.6/src/sync/waitgroup.go:127 +0xb4
github.com/influxdata/telegraf/agent.(*Agent).Run(0xc820146088, 0xc820142900, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:362 +0xa07
main.reloadLoop(0xc82000a420, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:273 +0x2350
main.main()
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:321 +0x5d
goroutine 17 [syscall, 34097 minutes, locked to thread]:
runtime.goexit()
#011/usr/lib/go-1.6/src/runtime/asm_amd64.s:1998 +0x1
goroutine 5 [syscall, 34097 minutes]:
os/signal.signal_recv(0x0)
#011/usr/lib/go-1.6/src/runtime/sigqueue.go:116 +0x132
os/signal.loop()
#011/usr/lib/go-1.6/src/os/signal/signal_unix.go:22 +0x18
created by os/signal.init.1
#011/usr/lib/go-1.6/src/os/signal/signal_unix.go:28 +0x37
goroutine 88 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/plugins/outputs/prometheus_client.(*PrometheusClient).Start
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/outputs/prometheus_client/prometheus_client.go:52 +0x1f3
goroutine 89 [select, 34097 minutes, locked to thread]:
runtime.gopark(0x179d368, 0xc820471f28, 0x14d08e8, 0x6, 0x18, 0x2)
#011/usr/lib/go-1.6/src/runtime/proc.go:262 +0x163
runtime.selectgoImpl(0xc820471f28, 0x0, 0x18)
#011/usr/lib/go-1.6/src/runtime/select.go:392 +0xa67
runtime.selectgo(0xc820471f28)
#011/usr/lib/go-1.6/src/runtime/select.go:215 +0x12
runtime.ensureSigM.func1()
#011/usr/lib/go-1.6/src/runtime/signal1_unix.go:279 +0x358
runtime.goexit()
#011/usr/lib/go-1.6/src/runtime/asm_amd64.s:1998 +0x1
goroutine 90 [select, 34097 minutes]:
main.reloadLoop.func3(0xc820142960, 0xc820142900, 0xc8201440e0, 0xc82000a420)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:241 +0x22d
created by main.reloadLoop
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/cmd/telegraf/telegraf.go:255 +0x1c1d
goroutine 10 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:345 +0x8d5
goroutine 11 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 12 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 13 [semacquire]:
fmt.Sprintf(0x16e7f40, 0x46, 0xc820058c90, 0x2, 0x2, 0x0, 0x0)
#011/usr/lib/go-1.6/src/fmt/print.go:204 +0xa6
log.Printf(0x16e7f40, 0x46, 0xc820058c90, 0x2, 0x2)
#011/usr/lib/go-1.6/src/log/log.go:289 +0x49
github.com/influxdata/telegraf/agent.gatherWithTimeout(0xc820142900, 0xc8202a0030, 0xc820d53bc0, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:176 +0x47a
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc820146088, 0xc820142900, 0xc8202a0030, 0x2540be400, 0xc820142ba0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:128 +0x25b
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046af20, 0xc820146088, 0xc820142900, 0xc820142ba0, 0xc8202a0030, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 14 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 15 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 16 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 98 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 99 [semacquire]:
github.com/influxdata/telegraf/agent.gatherWithTimeout(0xc820142900, 0xc8202a0390, 0xc82097d5c0, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:176 +0x3c0
github.com/influxdata/telegraf/agent.(*Agent).gatherer(0xc820146088, 0xc820142900, 0xc8202a0390, 0x2540be400, 0xc820142ba0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:128 +0x25b
github.com/influxdata/telegraf/agent.(*Agent).Run.func2(0xc82046af20, 0xc820146088, 0xc820142900, 0xc820142ba0, 0xc8202a0390, 0x2540be400)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:356 +0x7e
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 100 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.(*Agent).Run
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:359 +0x9cc
goroutine 53 [copystack]:
fmt.(*fmt).padString(0xc820b6f098, 0x14c4940, 0x3)
#011/usr/lib/go-1.6/src/fmt/format.go:128
fmt.(*fmt).fmt_s(0xc820b6f098, 0x14c4940, 0x3)
#011/usr/lib/go-1.6/src/fmt/format.go:322 +0x61
fmt.(*pp).fmtString(0xc820b6f040, 0x14c4940, 0x3, 0xc800000076)
#011/usr/lib/go-1.6/src/fmt/print.go:518 +0x144
fmt.(*pp).printArg(0xc820b6f040, 0x1058320, 0xc820778060, 0x76, 0x0, 0x0)
#011/usr/lib/go-1.6/src/fmt/print.go:797 +0xd95
fmt.(*pp).handleMethods(0xc820b6f040, 0xc800000076, 0x0, 0x1)
#011/usr/lib/go-1.6/src/fmt/print.go:724 +0x543
fmt.(*pp).printArg(0xc820b6f040, 0x1309e00, 0xc820778050, 0x76, 0x0, 0x0)
#011/usr/lib/go-1.6/src/fmt/print.go:806 +0x4a9
fmt.(*pp).doPrintf(0xc820b6f040, 0x16264c0, 0x1a, 0xc8204f9c30, 0x1, 0x1)
#011/usr/lib/go-1.6/src/fmt/print.go:1238 +0x1dcd
fmt.Sprintf(0x16264c0, 0x1a, 0xc8204f9c30, 0x1, 0x1, 0x0, 0x0)
#011/usr/lib/go-1.6/src/fmt/print.go:203 +0x6f
fmt.Errorf(0x16264c0, 0x1a, 0xc8204f9c30, 0x1, 0x1, 0x0, 0x0)
#011/usr/lib/go-1.6/src/fmt/print.go:212 +0x65
net/http.(*persistConn).readLoopPeekFailLocked(0xc820118340, 0x7f5cd4266198, 0xc820778050)
#011/usr/lib/go-1.6/src/net/http/transport.go:1225 +0x336
net/http.(*persistConn).readLoop(0xc820118340)
#011/usr/lib/go-1.6/src/net/http/transport.go:1080 +0x26d
created by net/http.(*Transport).dialConn
#011/usr/lib/go-1.6/src/net/http/transport.go:857 +0x10a6
goroutine 54 [select, 10805 minutes]:
net/http.(*persistConn).writeLoop(0xc820118340)
#011/usr/lib/go-1.6/src/net/http/transport.go:1277 +0x472
created by net/http.(*Transport).dialConn
#011/usr/lib/go-1.6/src/net/http/transport.go:858 +0x10cb
goroutine 3281067 [running]:
#011goroutine running on other thread; stack unavailable
created by github.com/influxdata/telegraf/agent.gatherWithTimeout
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:164 +0xe0
goroutine 3280892 [copystack]:
bytes.Replace(0xc8202c1790, 0x6, 0x8, 0xc82000cee8, 0x2, 0x8, 0xc8204f7447, 0x1, 0x1, 0xffffffffffffffff, ...)
#011/usr/lib/go-1.6/src/bytes/bytes.go:633 +0x107
github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/models.unescapeMeasurement(0xc8202c1790, 0x6, 0x8, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/models/points.go:1009 +0x153
github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/models.MakeKey(0xc8202c1790, 0x6, 0x8, 0xc8204f7af0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/models/points.go:1231 +0x64
github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/models.NewPoint(0x14d14b0, 0x6, 0xc8204f7af0, 0xc8204f7ac0, 0xecfece09a, 0x7f5c00000000, 0x20cf480, 0x0, 0x0, 0x0, ...)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/models/points.go:1114 +0x544
github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/client/v2.NewPoint(0x14d14b0, 0x6, 0xc8204f7af0, 0xc8204f7ac0, 0xc8204f78a8, 0x1, 0x1, 0x627f9aca2adf32c3, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/vendor/github.com/influxdata/influxdb/client/v2/client.go:351 +0xc3
github.com/influxdata/telegraf.NewMetric(0x14d14b0, 0x6, 0xc8204f7af0, 0xc8204f7ac0, 0xecfece09a, 0x0, 0x20cf480, 0x0, 0x0, 0x0, ...)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/metric.go:50 +0x10a
github.com/influxdata/telegraf/agent.(*accumulator).AddFields(0xc82097d5c0, 0x14d14b0, 0x6, 0x0, 0xc8204f7af0, 0x0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/accumulator.go:150 +0x8fb
github.com/influxdata/telegraf/plugins/inputs/system.(*SystemStats).Gather(0x20f4db8, 0x7f5cd6f015a8, 0xc82097d5c0, 0x0, 0x0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/plugins/inputs/system/system.go:49 +0x5c2
github.com/influxdata/telegraf/agent.gatherWithTimeout.func1(0xc82029e1e0, 0xc8202a0390, 0xc82097d5c0)
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:163 +0x73
created by github.com/influxdata/telegraf/agent.gatherWithTimeout
#011/build/telegraf-71UO6N/telegraf-1.0.1+git1324~215f1b5~ubuntu16.04.1/obj-x86_64-linux-gnu/src/github.com/influxdata/telegraf/agent/agent.go:164 +0xe0
rax 0xca
rbx 0x0
rcx 0x465933
rdx 0x0
rdi 0x20d0ae8
rsi 0x0
rbp 0x1
rsp 0x7ffcb07b4dd0
r8 0x0
r9 0x0
r10 0x0
r11 0x286
r12 0x17
r13 0x1797732
r14 0x4
r15 0x8
rip 0x465931
rflags 0x286
cs 0x33
fs 0x0
gs 0x0
I'm seeing a similar issue right now with one node, though it's happened several times. Using strace, I could see that it was stuck on a futex. I can't get the full stack trace (stupid setup), but here's some highlights using version 1.2:
Goroutine 0 [idle] asm_amd64.s:240 proc.go:2183 proc.go:2120 proc.go:2021 proc.go:1594 lock_futex.go:145 os_linux.go:45 sys_linux_amd64.go:387
Goroutine 1 [semacquire, 1490 minutes]: telegraf.go: 342 telegraf.go:284 agent.go:383 waitgroup.go:131 sema.go:47
Goroutine 16 [select, 1490 minutes, locked to thread]: asm_amd64.s:2086 signal1_unix.go:304 select.go:238 select.go:423 proc.go:259
goroutine running on another thread; stack unavailable created by github.com/influxdata/telegraf/agent.gatherWithTimeout agent.go:154
goroutine running on another thread; stack unavailable created by github.com/shirou/gopsutil/internal/common.WaitTimeout github.com/shirou/gopsutil/internal/common/common.go:329
goroutine running on another thread; stack unavailable ceated by os/exec.(*Cmd).Start exec.go:371
Config:
outputs.kafka inputs.docker inputs.cpu inputs.mem inputs.processes inputs.system inputs.disk inputs.dns_query inputs.netstat inputs.net_response inputs.net inputs.diskio
@axinojolais Are you still having this issue? Would it be possible for you to retest with the latest development version?
It's going to take a while, I'll get back to you
Thanks, can you just open a new issue if it is still happening?
Bug report
telegraf stops gathering metrics and the prometheus_client output becomes unresponsive
Relevant telegraf.conf:
System info:
telegraf version 1.0.1+git1324~215f1b5, Ubuntu xenial
Steps to reproduce:
Expected behavior:
telegraf never stops gathering data and can output metrics when queried
Actual behavior:
telegraf stops gathering data and times out when metrics are requested (prometheous_client output)
Additional info:
I tried to ctrl+\ the process, but couldn't succeed in reptyr'ing it so I failed. It's now running in a tmux session, should the problem appear again. stracing reveals the following calls just repeat as fast as possible :
Note that I have seen this behaviour multiple times, on different servers.
Last logs :