logv / sybil

columnar storage + NoSQL OLAP engine | https://logv.org
https://logv.org
Other
305 stars 25 forks source link

fatal error: concurrent map iteration and map write #100

Closed alum closed 5 years ago

alum commented 5 years ago

I have compiled sybil with go 1.11.2 on os x. Sometimes when running queries I will get panic: concurrent map iteration and map write. It seems to me that the problem is that this iteration over t.KeyTable is missing an RLock/RUnlock combo.

The table has a large amount of columns (3000+), but only two are queried. The amount of data is pretty small, around 40K records in total.

Full query / stack trace:

$ sybil query -dir /tmp/sybil -table mytable -time -time-bucket 3600 -str-filter profile_id:eq:18ae867d-a3c2-43cd-909d-09e478f63ccd -int-filter time:gt:1560250098

fatal error: concurrent map iteration and map write

goroutine 25 [running]:
runtime.throw(0x11dad10, 0x26)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/panic.go:608 +0x72 fp=0xc0011107a0 sp=0xc001110770 pc=0x10295a2
runtime.mapiternext(0xc001110a28)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/map.go:790 +0x525 fp=0xc001110828 sp=0xc0011107a0 pc=0x100df95
github.com/logv/sybil/src/lib.(*TableBlock).makeRecordSlab(0xc00045c090, 0xc00082a180, 0x2000, 0xc0000da0c0, 0xc0007a3d70, 0x9b2a8225573c4100, 0x10, 0x1, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/record_slab.go:39 +0xb7 fp=0xc001110a98 sp=0xc001110828 pc=0x11494f7
github.com/logv/sybil/src/lib.(*TableBlock).allocateRecords(0xc00045c090, 0xc00082a180, 0xc000002000, 0xc0000da0c0, 0xc0007a3d70, 0x7ffeefbff600, 0x0, 0x0, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/record_slab.go:20 +0xa7 fp=0xc001110af8 sp=0xc001110a98 pc=0x1149267
github.com/logv/sybil/src/lib.(*Table).LoadBlockFromDir(0xc0000d2000, 0xc00107bad0, 0x21, 0xc00082a180, 0x0, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:221 +0x287 fp=0xc001110d08 sp=0xc001110af8 pc=0x1151567
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107bad0, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:127 +0xa10 fp=0xc001110f58 sp=0xc001110d08 pc=0x1160a30
runtime.goexit()
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/asm_amd64.s:1333 +0x1 fp=0xc001110f60 sp=0xc001110f58 pc=0x1053b51
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3

goroutine 1 [semacquire]:
sync.runtime_Semacquire(0xc000012228)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:56 +0x39
sync.(*WaitGroup).Wait(0xc000012220)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/waitgroup.go:130 +0x64
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords(0xc0000d2000, 0xc00082a180, 0xc0004be500, 0xc0004be600)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:297 +0x10b4
github.com/logv/sybil/src/cmd.runQueryCmdLine()
    /Users/filip/go/src/github.com/logv/sybil/src/cmd/cmd_query.go:281 +0x15eb
github.com/logv/sybil/src/cmd.RunQueryCmdLine()
    /Users/filip/go/src/github.com/logv/sybil/src/cmd/cmd_query.go:74 +0x2f
main.main()
    /Users/filip/go/src/github.com/logv/sybil/main.go:95 +0xcf

goroutine 19 [semacquire]:
sync.runtime_SemacquireMutex(0xc0000b2024, 0xc0000b2001)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:71 +0x3d
sync.(*Mutex).Lock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/mutex.go:134 +0xff
sync.(*RWMutex).Lock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/rwmutex.go:93 +0x2d
github.com/logv/sybil/src/lib.(*Table).get_key_id(0xc0000d2000, 0xc0006b5ec0, 0x26, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table.go:139 +0xc4
github.com/logv/sybil/src/lib.(*Table).ShouldLoadBlockFromDir(0xc0000d2000, 0xc00107b9b0, 0x21, 0xc0004be500, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:125 +0x2a8
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107b9b0, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:110 +0xb9
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3

goroutine 20 [runnable]:
sync.runtime_SemacquireMutex(0xc0000b202c, 0xc00104a700)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:71 +0x3d
sync.(*RWMutex).RLock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/rwmutex.go:50 +0x4e
github.com/logv/sybil/src/lib.(*Table).get_key_id(0xc0000d2000, 0xc0006485d0, 0x26, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table.go:132 +0x3d
github.com/logv/sybil/src/lib.(*Table).ShouldLoadBlockFromDir(0xc0000d2000, 0xc00107b9e0, 0x21, 0xc0004be500, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:125 +0x2a8
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107b9e0, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:110 +0xb9
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3

goroutine 21 [semacquire]:
sync.runtime_SemacquireMutex(0xc0000b202c, 0xc001059c00)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:71 +0x3d
sync.(*RWMutex).RLock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/rwmutex.go:50 +0x4e
github.com/logv/sybil/src/lib.(*Table).get_key_id(0xc0000d2000, 0xc0006f5aa0, 0x26, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table.go:132 +0x3d
github.com/logv/sybil/src/lib.(*Table).ShouldLoadBlockFromDir(0xc0000d2000, 0xc00107ba10, 0x21, 0xc0004be500, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:125 +0x2a8
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107ba10, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:110 +0xb9
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3

goroutine 22 [semacquire]:
sync.runtime_SemacquireMutex(0xc0000b202c, 0xc001048800)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:71 +0x3d
sync.(*RWMutex).RLock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/rwmutex.go:50 +0x4e
github.com/logv/sybil/src/lib.(*Table).get_key_id(0xc0000d2000, 0xc0002bd260, 0x26, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table.go:132 +0x3d
github.com/logv/sybil/src/lib.(*Table).ShouldLoadBlockFromDir(0xc0000d2000, 0xc00107ba40, 0x21, 0xc0004be500, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:125 +0x2a8
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107ba40, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:110 +0xb9
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3

goroutine 23 [semacquire]:
sync.runtime_SemacquireMutex(0xc0000b2024, 0xc0000b2000)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:71 +0x3d
sync.(*Mutex).Lock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/mutex.go:134 +0xff
sync.(*RWMutex).Lock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/rwmutex.go:93 +0x2d
github.com/logv/sybil/src/lib.(*Table).get_key_id(0xc0000d2000, 0xc00011fd40, 0x26, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table.go:139 +0xc4
github.com/logv/sybil/src/lib.(*Table).ShouldLoadBlockFromDir(0xc0000d2000, 0xc00107ba70, 0x21, 0xc0004be500, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:125 +0x2a8
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107ba70, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:110 +0xb9
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3

goroutine 24 [runnable]:
sync.runtime_Semrelease(0xc0000b202c, 0x11df100)
    /usr/local/Cellar/go/1.11.2/libexec/src/runtime/sema.go:65 +0x43
sync.(*RWMutex).Unlock(0xc0000b2020)
    /usr/local/Cellar/go/1.11.2/libexec/src/sync/rwmutex.go:128 +0x5b
github.com/logv/sybil/src/lib.(*Table).get_key_id(0xc0000d2000, 0xc0008758c0, 0x26, 0x149a)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table.go:149 +0x24c
github.com/logv/sybil/src/lib.(*Table).ShouldLoadBlockFromDir(0xc0000d2000, 0xc00107baa0, 0x21, 0xc0004be500, 0x0)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_block_io.go:125 +0x2a8
github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords.func1(0xc000012220, 0xc0000d2000, 0xc00107baa0, 0x21, 0xc0004be500, 0xc0006ca480, 0xc00082a180, 0xc0006ca458, 0xc0006ca488, 0xc0003cd780, ...)
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:110 +0xb9
created by github.com/logv/sybil/src/lib.(*Table).LoadAndQueryRecords
    /Users/filip/go/src/github.com/logv/sybil/src/lib/table_query.go:105 +0x8a3
okayzed commented 5 years ago

Thanks for opening the issue! I'll investigate promptly - I was out of town this week, sorry for the delay in responding.

okayzed commented 5 years ago

I pushed a commit that wraps those lines in the relevant mutex (t.string_id_m). I generally run tests with '-race' flag, but it seems that I am not generating the circumstances to flag this condition in my testing, so I will look into how to do that so I can find and locate more potential problem areas.

Thanks for reporting and let me know if this fixes it for you

(PS: 3000 columns? wow!)

alum commented 5 years ago

Thanks! (it works)