sni / lmd

Livestatus Multitool Daemon - Create livestatus federation from multiple sources
https://labs.consol.de/omd/packages/lmd/
GNU General Public License v3.0
42 stars 31 forks source link

fatal error: concurrent map read and map write #16

Closed jlopezzarza closed 6 years ago

jlopezzarza commented 7 years ago

Hi there,

Let me begin by thanking you for this tool, pretty awesome!

We have some shinken deployed and we use LMD as cache to avoid livestatus overheating. In this particular case the setup is a shinken-2.4.3 with over 5k hosts with 20k services, all components installed on the same server, even LMD.

In a few ocasions, lmd go offline and exit with the following error:

fatal error: concurrent map read and map write

goroutine 456549 [running]:
runtime.throw(0x8a03f5, 0x21)
    /root/.gvm/gos/go1.7.4/src/runtime/panic.go:566 +0x95 fp=0xc42c441bf0 sp=0xc42c441bd0
runtime.mapaccess1_faststr(0x81b260, 0xc420167dd0, 0xc42eb92224, 0x8, 0x1)
    /root/.gvm/gos/go1.7.4/src/runtime/hashmap_fast.go:201 +0x4f3 fp=0xc42c441c50 sp=0xc42c441bf0
main.(*Peer).BuildLocalResponseData(0xc4201be2d0, 0xc42649d4a0, 0xc4269add00, 0x0, 0x0, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:1707 +0x14e fp=0xc42c441d58 sp=0xc42c441c50
main.(*Response).BuildLocalResponse.func1(0xc4201be2d0, 0xc42649d4a0, 0xc4269add00, 0xc42eb922a8, 0xc4201be2d0, 0xc42eb922d0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/response.go:516 +0x152 fp=0xc42c441f80 sp=0xc42c441d58
runtime.goexit()
    /root/.gvm/gos/go1.7.4/src/runtime/asm_amd64.s:2086 +0x1 fp=0xc42c441f88 sp=0xc42c441f80
created by main.(*Response).BuildLocalResponse
    /var/omnibus/src/src/github.com/sni/lmd/lmd/response.go:540 +0x27a

goroutine 1 [select, 1532 minutes]:
main.mainLoop(0xc4201bc360, 0x8d4500)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/main.go:217 +0x9cb
main.main()
    /var/omnibus/src/src/github.com/sni/lmd/lmd/main.go:149 +0x5b

goroutine 17 [syscall, 1532 minutes, locked to thread]:
runtime.goexit()
    /root/.gvm/gos/go1.7.4/src/runtime/asm_amd64.s:2086 +0x1

goroutine 33 [select]:
main.LocalListenerLivestatus(0xc4201d21c0, 0x88d080, 0x3, 0xc420244090, 0xd, 0xc420244f00, 0xc4201bc780)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:227 +0x73d
main.LocalListener(0xc4201d21c0, 0xc420244090, 0xd, 0xc420244f00, 0xc420244f10, 0xc4201bc780)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:173 +0x2e1
main.mainLoop.func1(0xc4201d21c0, 0xc420244f00, 0xc420244f10, 0xc4201bc780, 0xc420244090, 0xd)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/main.go:206 +0x7f
created by main.mainLoop
    /var/omnibus/src/src/github.com/sni/lmd/lmd/main.go:207 +0x708

goroutine 6 [syscall, 1532 minutes]:
os/signal.signal_recv(0x0)
    /root/.gvm/gos/go1.7.4/src/runtime/sigqueue.go:116 +0x157
os/signal.loop()
    /root/.gvm/gos/go1.7.4/src/os/signal/signal_unix.go:22 +0x22
created by os/signal.init.1
    /root/.gvm/gos/go1.7.4/src/os/signal/signal_unix.go:28 +0x41

goroutine 18 [select, 1532 minutes, locked to thread]:
runtime.gopark(0x8d4d40, 0x0, 0x88f45e, 0x6, 0x18, 0x2)
    /root/.gvm/gos/go1.7.4/src/runtime/proc.go:259 +0x13a
runtime.selectgoImpl(0xc42004af30, 0x0, 0x18)
    /root/.gvm/gos/go1.7.4/src/runtime/select.go:423 +0x11d9
runtime.selectgo(0xc42004af30)
    /root/.gvm/gos/go1.7.4/src/runtime/select.go:238 +0x1c
runtime.ensureSigM.func1()
    /root/.gvm/gos/go1.7.4/src/runtime/signal1_unix.go:304 +0x2f3
runtime.goexit()
    /root/.gvm/gos/go1.7.4/src/runtime/asm_amd64.s:2086 +0x1

goroutine 34 [IO wait, 1532 minutes]:
net.runtime_pollWait(0x7f565aff3e40, 0x72, 0x0)
    /root/.gvm/gos/go1.7.4/src/runtime/netpoll.go:160 +0x59
net.(*pollDesc).wait(0xc42017baa0, 0x72, 0xc42005dc38, 0xc420010038)
    /root/.gvm/gos/go1.7.4/src/net/fd_poll_runtime.go:73 +0x38
net.(*pollDesc).waitRead(0xc42017baa0, 0xa75440, 0xc420010038)
    /root/.gvm/gos/go1.7.4/src/net/fd_poll_runtime.go:78 +0x34
net.(*netFD).accept(0xc42017ba40, 0x0, 0xa73e00, 0xc4202820c0)
    /root/.gvm/gos/go1.7.4/src/net/fd_unix.go:419 +0x238
net.(*UnixListener).accept(0xc420282060, 0x4820de, 0xc42005dcf8, 0x40c060)
    /root/.gvm/gos/go1.7.4/src/net/unixsock_posix.go:158 +0x32
net.(*UnixListener).Accept(0xc420282060, 0x8d44c8, 0xc4201bc780, 0x88d8e2, 0x4)
    /root/.gvm/gos/go1.7.4/src/net/unixsock.go:229 +0x49
main.LocalListenerLivestatus(0xc4201d21c0, 0x88d8e2, 0x4, 0xc420234320, 0x1a, 0xc420244f00, 0xc4201bc780)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:210 +0x3d5
main.LocalListener(0xc4201d21c0, 0xc420234320, 0x1a, 0xc420244f00, 0xc420244f10, 0xc4201bc780)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:180 +0x3a6
main.mainLoop.func1(0xc4201d21c0, 0xc420244f00, 0xc420244f10, 0xc4201bc780, 0xc420234320, 0x1a)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/main.go:206 +0x7f
created by main.mainLoop
    /var/omnibus/src/src/github.com/sni/lmd/lmd/main.go:207 +0x708

goroutine 50 [chan receive, 1532 minutes]:
main.LocalListenerLivestatus.func1(0xc4201bc780, 0x88d080, 0x3, 0xc420244090, 0xd, 0xa78080, 0xc42015c010)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:201 +0x4d
created by main.LocalListenerLivestatus
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:205 +0x3b0

goroutine 9 [chan receive, 1532 minutes]:
main.LocalListenerLivestatus.func1(0xc4201bc780, 0x88d8e2, 0x4, 0xc420234320, 0x1a, 0xa78100, 0xc420282060)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:201 +0x4d
created by main.LocalListenerLivestatus
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:205 +0x3b0

goroutine 35 [runnable]:
runtime.Caller(0x2, 0x4ae0b9, 0xc42a673540, 0xc42a6735a9, 0x19, 0x0)
    /root/.gvm/gos/go1.7.4/src/runtime/extern.go:178 +0x82
main.(*LoggingLock).RLockN(0xc420167e60, 0x2)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/logginglock.go:47 +0x40
main.(*Peer).StatusGet(0xc4201be2d0, 0x890d23, 0x8, 0x67, 0xffffffffffffffff)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:363 +0x3c
main.(*Peer).GetConnection(0xc4201be2d0, 0xc42515ea50, 0xc42515ea50, 0x6019b9, 0xc4201661b0, 0xc420232820, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:996 +0xa5
main.(*Peer).query(0xc4201be2d0, 0xc42515f530, 0x0, 0x0, 0x0, 0x0, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:800 +0x7f
main.(*Peer).Query(0xc4201be2d0, 0xc42515f530, 0xc42515edf8, 0xc4201f4b60, 0x2, 0x2, 0x2)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:937 +0x39
main.(*Peer).UpdateDeltaCommentsOrDowntimes(0xc4201be2d0, 0x891c00, 0x9, 0x0, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:710 +0x1b8
main.(*Peer).UpdateDeltaTables(0xc4201be2d0, 0x89241d)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:500 +0xb59
main.(*Peer).periodicUpdate(0xc4201be2d0, 0xc42515fdab, 0xc42515fdb0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:328 +0x381
main.(*Peer).updateLoop(0xc4201be2d0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:273 +0x20f
main.(*Peer).Start.func1(0xc4201be2d0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:188 +0x4f
created by main.(*Peer).Start
    /var/omnibus/src/src/github.com/sni/lmd/lmd/peer.go:193 +0x1b9

goroutine 456548 [semacquire]:
sync.runtime_Semacquire(0xc42eb922dc)
    /root/.gvm/gos/go1.7.4/src/runtime/sema.go:47 +0x30
sync.(*WaitGroup).Wait(0xc42eb922d0)
    /root/.gvm/gos/go1.7.4/src/sync/waitgroup.go:131 +0x97
main.(*Response).BuildLocalResponse(0xc42649d4a0, 0xc42eb922c0, 0x1, 0x1, 0xc4269add00, 0xc42eb922c0, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/response.go:543 +0x4a1
main.NewResponse(0xc422bc5540, 0x892dbe, 0xb, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/response.go:98 +0x7ca
main.(*Request).GetResponse(0xc422bc5540, 0x2ec3ac50, 0xa9aae0, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/request.go:304 +0x176
main.ProcessRequests(0xc42391d648, 0x1, 0x1, 0xa7ae80, 0xc42391d640, 0xc4269ad860, 0x14, 0x0, 0x0, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:92 +0x163
main.QueryServer(0xa7ae80, 0xc42391d640, 0xc42f5c2640, 0x0)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:45 +0x58f
main.LocalListenerLivestatus.func2(0xc42649d380, 0xa7ae80, 0xc42391d640)
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:225 +0x55
created by main.LocalListenerLivestatus
    /var/omnibus/src/src/github.com/sni/lmd/lmd/listener.go:226 +0x47b

Any clue about the error?

Thanks in advance.

sni commented 7 years ago

You are running the latest git head? Just to make sure we are not hunting already solved bugs.

jlopezzarza commented 7 years ago

I'm using the version tagged as v1.1.2

sni commented 7 years ago

Thats the latest version anyway.

sni commented 6 years ago

Could you try todays release? Should be better now.

jlopezzarza commented 6 years ago

I'm sorry but we have been forced to rollback that version on our production environments cause it goes offline without any panic or trace, and the process stops answering the queries...

I hope to get some time to test it further and i'll keep you posted

sni commented 6 years ago

Without a trace or error i cannot do anything.