Closed DasSkelett closed 3 years ago
Since the benchmark is only for go-json, could you add encoding/json and compare ?
Yup, here's the complete output running both benchmarks. encoding/json completes fine, but GoJson first encounters weird parsing errors, and later panics due to running OOM.
$ go test -bench=Benchmark_Decode_SlowReader -run=Benchmark_Decode_SlowReader -benchmem
goos: linux
goarch: amd64
pkg: benchmark
cpu: AMD Ryzen 5 3600 6-Core Processor
Benchmark_Decode_SlowReader_EncodingJson/chunksize_16384-12 4496 436685 ns/op 101904 B/op 208 allocs/op
Benchmark_Decode_SlowReader_EncodingJson/chunksize_4096-12 3704 441658 ns/op 97808 B/op 211 allocs/op
Benchmark_Decode_SlowReader_EncodingJson/chunksize_1024-12 2619 451213 ns/op 97808 B/op 230 allocs/op
Benchmark_Decode_SlowReader_EncodingJson/chunksize_256-12 3276 410263 ns/op 97808 B/op 312 allocs/op
Benchmark_Decode_SlowReader_EncodingJson/chunksize_64-12 2256 473940 ns/op 97792 B/op 641 allocs/op
Benchmark_Decode_SlowReader_GoJson/chunksize_16384-12 10000 109227 ns/op 65415 B/op 79 allocs/op
Benchmark_Decode_SlowReader_GoJson/chunksize_4096-12 6176 188928 ns/op 557069 B/op 87 allocs/op
--- FAIL: Benchmark_Decode_SlowReader_GoJson/chunksize_1024
decode_test.go:473: expected colon after object key
--- FAIL: Benchmark_Decode_SlowReader_GoJson/chunksize_256
decode_test.go:473: expected colon after object key
fatal error: runtime: out of memory
runtime stack:
runtime.throw(0x86bcd9, 0x16)
/usr/lib/go-1.16/src/runtime/panic.go:1117 +0x72
runtime.sysMap(0xc804000000, 0x800000000, 0xb16210)
/usr/lib/go-1.16/src/runtime/mem_linux.go:169 +0xc6
runtime.(*mheap).sysAlloc(0xafbb00, 0x800000000, 0x42dc97, 0xafbb08)
/usr/lib/go-1.16/src/runtime/malloc.go:729 +0x1e5
runtime.(*mheap).grow(0xafbb00, 0x400000, 0x0)
/usr/lib/go-1.16/src/runtime/mheap.go:1346 +0x85
runtime.(*mheap).allocSpan(0xafbb00, 0x400000, 0x460100, 0x7fb4dc0d0620)
/usr/lib/go-1.16/src/runtime/mheap.go:1173 +0x609
runtime.(*mheap).alloc.func1()
/usr/lib/go-1.16/src/runtime/mheap.go:910 +0x59
runtime.systemstack(0x0)
/usr/lib/go-1.16/src/runtime/asm_amd64.s:379 +0x66
runtime.mstart()
/usr/lib/go-1.16/src/runtime/proc.go:1246
goroutine 60 [running]:
runtime.systemstack_switch()
/usr/lib/go-1.16/src/runtime/asm_amd64.s:339 fp=0xc000059a78 sp=0xc000059a70 pc=0x470520
runtime.(*mheap).alloc(0xafbb00, 0x400000, 0x101, 0x40)
/usr/lib/go-1.16/src/runtime/mheap.go:904 +0x85 fp=0xc000059ac8 sp=0xc000059a78 pc=0x429945
runtime.(*mcache).allocLarge(0x7fb505e43b38, 0x800000000, 0x7f0101, 0x40f901)
/usr/lib/go-1.16/src/runtime/mcache.go:224 +0x97 fp=0xc000059b20 sp=0xc000059ac8 pc=0x419fb7
runtime.mallocgc(0x800000000, 0x7fd220, 0xc000059c01, 0x419e13)
/usr/lib/go-1.16/src/runtime/malloc.go:1078 +0x925 fp=0xc000059ba8 sp=0xc000059b20 pc=0x40fd45
runtime.makeslice(0x7fd220, 0x800000000, 0x800000000, 0x7fb4dc07b438)
/usr/lib/go-1.16/src/runtime/slice.go:98 +0x6c fp=0xc000059bd8 sp=0xc000059ba8 pc=0x4520cc
github.com/goccy/go-json.(*stream).readBuf(...)
/home/user/git/go-json/decode_stream.go:66
github.com/goccy/go-json.(*stream).read(0xc0001000f0, 0xc402100066)
/home/user/git/go-json/decode_stream.go:76 +0x67 fp=0xc000059c38 sp=0xc000059bd8 pc=0x723147
github.com/goccy/go-json.decodeKeyByBitmapUint8Stream(0xc000166120, 0xc0001000f0, 0x3, 0xc00162e0f0, 0x0, 0x0, 0x0)
/home/user/git/go-json/decode_struct.go:367 +0x3c6 fp=0xc000059c90 sp=0xc000059c38 pc=0x7298c6
github.com/goccy/go-json.(*structDecoder).decodeStream(0xc000166120, 0xc0001000f0, 0x3, 0xc00162e0f0, 0x5, 0xc000059dd8)
/home/user/git/go-json/decode_struct.go:518 +0x19a fp=0xc000059d00 sp=0xc000059c90 pc=0x72a3da
github.com/goccy/go-json.(*ptrDecoder).decodeStream(0xc000076c00, 0xc0001000f0, 0x2, 0xc000138070, 0x0, 0x0)
/home/user/git/go-json/decode_ptr.go:54 +0xaf fp=0xc000059d40 sp=0xc000059d00 pc=0x7215ef
github.com/goccy/go-json.(*sliceDecoder).decodeStream(0xc00061a000, 0xc0001000f0, 0x2, 0xc00006a020, 0x5, 0x0)
/home/user/git/go-json/decode_slice.go:131 +0x4b0 fp=0xc000059de8 sp=0xc000059d40 pc=0x7222d0
github.com/goccy/go-json.(*structDecoder).decodeStream(0xc000166090, 0xc0001000f0, 0x1, 0xc00006a020, 0x0, 0x40fed8)
/home/user/git/go-json/decode_struct.go:536 +0x291 fp=0xc000059e58 sp=0xc000059de8 pc=0x72a4d1
github.com/goccy/go-json.(*Decoder).Decode(0xc000059ef8, 0x81c220, 0xc00006a020, 0xc0000956f8, 0x48da86)
/home/user/git/go-json/decode.go:164 +0xed fp=0xc000059eb8 sp=0xc000059e58 pc=0x71162d
benchmark.Benchmark_Decode_SlowReader_GoJson.func1(0xc000148480)
/home/user/git/go-json/benchmarks/decode_test.go:471 +0x148 fp=0xc000059f30 sp=0xc000059eb8 pc=0x7d7568
testing.(*B).runN(0xc000148480, 0x1)
/usr/lib/go-1.16/src/testing/benchmark.go:192 +0xeb fp=0xc000059fa0 sp=0xc000059f30 pc=0x4e2c0b
testing.(*B).run1.func1(0xc000148480)
/usr/lib/go-1.16/src/testing/benchmark.go:232 +0x57 fp=0xc000059fd8 sp=0xc000059fa0 pc=0x4ef4d7
runtime.goexit()
/usr/lib/go-1.16/src/runtime/asm_amd64.s:1371 +0x1 fp=0xc000059fe0 sp=0xc000059fd8 pc=0x472361
created by testing.(*B).run1
/usr/lib/go-1.16/src/testing/benchmark.go:225 +0x7f
goroutine 1 [chan receive]:
testing.(*B).run1(0xc000148900, 0xc000148900)
/usr/lib/go-1.16/src/testing/benchmark.go:234 +0xa5
testing.(*B).Run(0xc000148240, 0x872125, 0x22, 0x87e670, 0x4e2800)
/usr/lib/go-1.16/src/testing/benchmark.go:667 +0x373
testing.runBenchmarks.func1(0xc000148240)
/usr/lib/go-1.16/src/testing/benchmark.go:541 +0x78
testing.(*B).runN(0xc000148240, 0x1)
/usr/lib/go-1.16/src/testing/benchmark.go:192 +0xeb
testing.runBenchmarks(0x866980, 0x9, 0xc00000e588, 0xade200, 0x93, 0x93, 0xae4140)
/usr/lib/go-1.16/src/testing/benchmark.go:550 +0x3d1
testing.(*M).Run(0xc000138180, 0x0)
/usr/lib/go-1.16/src/testing/testing.go:1424 +0x51d
main.main()
_testmain.go:335 +0x138
goroutine 56 [chan receive]:
testing.(*B).run1(0xc000148480, 0xc000148480)
/usr/lib/go-1.16/src/testing/benchmark.go:234 +0xa5
testing.(*B).Run(0xc000148900, 0xc0018a4030, 0xc, 0xc00162e240, 0x0)
/usr/lib/go-1.16/src/testing/benchmark.go:667 +0x373
benchmark.Benchmark_Decode_SlowReader_GoJson(0xc000148900)
/home/user/git/go-json/benchmarks/decode_test.go:466 +0xb3
testing.(*B).runN(0xc000148900, 0x1)
/usr/lib/go-1.16/src/testing/benchmark.go:192 +0xeb
testing.(*B).run1.func1(0xc000148900)
/usr/lib/go-1.16/src/testing/benchmark.go:232 +0x57
created by testing.(*B).run1
/usr/lib/go-1.16/src/testing/benchmark.go:225 +0x7f
exit status 2
FAIL benchmark 12.032s
In theory I imagine something like the following as fix.
It does get rid of the OOM situation, but the benchmarks with a chunksize of <=4k fail all with expected colon after object key
, which I have yet to find the cause for.
diff --git a/decode_stream.go b/decode_stream.go
index b1c99f9..76e78c6 100644
--- a/decode_stream.go
+++ b/decode_stream.go
@@ -18,6 +18,7 @@ type stream struct {
offset int64
cursor int64
allRead bool
+ filledBuffer bool
useNumber bool
disallowUnknownFields bool
}
@@ -60,10 +61,12 @@ func (s *stream) reset() {
}
func (s *stream) readBuf() []byte {
- s.bufSize *= 2
- remainBuf := s.buf
- s.buf = make([]byte, s.bufSize)
- copy(s.buf, remainBuf)
+ if s.filledBuffer {
+ s.bufSize *= 2
+ remainBuf := s.buf
+ s.buf = make([]byte, s.bufSize)
+ copy(s.buf, remainBuf)
+ }
return s.buf[s.cursor:]
}
@@ -76,6 +79,12 @@ func (s *stream) read() bool {
buf[last] = nul
n, err := s.r.Read(buf[:last])
s.length = s.cursor + int64(n)
+ if n == (len(buf) - 1) {
+ s.filledBuffer = true
+ } else {
+ // Didn't fill the buffer, slow reader. Don't grow it the next time.
+ s.filledBuffer = false
+ }
if err == io.EOF {
s.allRead = true
} else if err != nil {
Okay, I also found the cause for the expected colon after object key
error.
In decodeKeyByBitmapUint8Stream()
, decodeKeyByBitmapUint16Stream()
and maybe many other places, when reaching the end of the buffer data and the case nul:
triggers, it basically skips a character.
https://github.com/goccy/go-json/blob/ac159966aa3585a6e5791286c06911d95bc2a694/decode_struct.go#L347-L372
Let's assume we are at the beginning of a new iteration of the loop. The cursor is incremented once, now it's at position 7, the char at this position is the first nul
at the end of the buffer data.
The switch-case goes to the nul
case, it reads new data into the buffer from the stream, and the cursor stays at the same position (7) (s.cursor = cursor; _, cursor, _ = s.stat()
is basically a no-op in regards to the value of the cursor
variable).
We loop into the next iteration, cursor gets incremented by one to 8.
Now the code never actually looked at the real char at position 7. It only ever saw the nul
as sign for the end of buffer data, but not what char got read into that position – Which happens to be the "
in many cases during the benchmark.
Inserting a cursor--
in line 371 fixed the benchmark for me:
case nul:
s.cursor = cursor
if !s.read() {
return nil, "", errUnexpectedEndOfJSON("string", s.totalOffset())
}
buf, cursor, p = s.stat()
cursor--
However it looks like there are many other places that would need this fix as well.
Shall I open a new issue for this? You can basically use the above benchmark code for reproduction, but I can try to craft something smaller and more specific to this char skip problem.
Thank you for the detailed information ! I'll fix this .
Background
I wrote a Prometheus exporter that downloads a medium sized big JSON file (~1.5 MiB) from the web and analyzes its contents to produce some metrics. It's calling go-json using
(https://gitlab.com/DasSkelett/NetKAN-Status-Exporter/-/blob/main/collector.go#L285) where the reader is a
http:Get()
reponse body (https://gitlab.com/DasSkelett/NetKAN-Status-Exporter/-/blob/main/collector.go#L266).When trying out go-json to see if it can speed up parsing a bit, I discovered inexplicably high memory consumption, orders of magnitude higher than for example
encoding/json
. This means: 200-400 MiB on a server, and 4-16 GiB on my PC – a bit much for a file of 1.5 MiB.This is very weird behaviour, especially because it is different on different machines, and the memory consumption isn't always equally high. Furthermore, it was impossible to reproduce reading a file from the local disk, nor from a local webserver. Eventually, after taking a deep look into go-json's source code and trying to understand its behaviour, I think I found the cause.
Problem
Whenever the decoder comes to the end of the current data in the buffer, it calls
s.read()
.s.read()
then doubles the buffer size (ins.readBuf()
), and asks theio.Reader
source to read more data into the buffer.This is where the problem is: the convention for
io.Reader
is, that it should not block and wait until it has enough data to fill the buffer, but instead dump everything it currently has and signal that it didn't fill the buffer usingn
.https://golang.org/pkg/io/#Reader
This means that go-json gets to the end of the new data in the stream pretty quick, call
s.read()
again, doubles the buffer, asks the reader to fill it again, which only returns a few bytes, etc. etc.In essence, if the reader is slower than go-json, the buffer inflates massively, but never gets filled to the top.
This is especially the case if
io.Reader
directly reads from the internet, with a "slow" connection – and explains why the effects weren't as bad when running on a server with Gigabit connection.A first solution that comes to my mind is that
s.read()
needs to remember whethers.r.Read()
managed to fill the buffer completely the last time. It it did, double it, if it didn't keep the buffer the same size.Benchmarking
Here are some pprof outputs, click to expand.
``` (pprof) top Showing nodes accounting for 4GB, 99.69% of 4.01GB total Dropped 5 nodes (cum <= 0.02GB) Showing top 10 nodes out of 14 flat flat% sum% cum cum% 4GB 99.69% 99.69% 4GB 99.69% github.com/goccy/go-json.(*stream).readBuf (inline) 0 0% 99.69% 4GB 99.69% github.com/goccy/go-json.(*Decoder).Decode 0 0% 99.69% 4GB 99.69% github.com/goccy/go-json.(*mapDecoder).decodeStream 0 0% 99.69% 4GB 99.69% github.com/goccy/go-json.(*stream).read 0 0% 99.69% 1.50GB 37.38% github.com/goccy/go-json.(*stringDecoder).decodeStream 0 0% 99.69% 1.50GB 37.38% github.com/goccy/go-json.(*stringDecoder).decodeStreamByte 0 0% 99.69% 4GB 99.69% github.com/goccy/go-json.(*structDecoder).decodeStream 0 0% 99.69% 2.50GB 62.29% github.com/goccy/go-json.decodeKeyByBitmapUint16Stream 0 0% 99.69% 1.49GB 37.19% github.com/goccy/go-json.stringBytes 0 0% 99.69% 4GB 99.69% github.com/prometheus/client_golang/prometheus.(*Registry).Gather.func1 (pprof) list read Total: 4.01GB ROUTINE ======================== github.com/goccy/go-json.(*stream).read in /home/user/go/pkg/mod/github.com/goccy/go-json@v0.4.13/decode_stream.go 0 4GB (flat, cum) 99.69% of Total . . 69: . . 70:func (s *stream) read() bool { . . 71: if s.allRead { . . 72: return false . . 73: } . 4GB 74: buf := s.readBuf() . . 75: last := len(buf) - 1 . . 76: buf[last] = nul . . 77: n, err := s.r.Read(buf[:last]) . . 78: s.length = s.cursor + int64(n) . . 79: if err == io.EOF { ROUTINE ======================== github.com/goccy/go-json.(*stream).readBuf in /home/user/go/pkg/mod/github.com/goccy/go-json@v0.4.13/decode_stream.go 4GB 4GB (flat, cum) 99.69% of Total . . 60:} . . 61: . . 62:func (s *stream) readBuf() []byte { . . 63: s.bufSize *= 2 . . 64: remainBuf := s.buf 4GB 4GB 65: s.buf = make([]byte, s.bufSize) . . 66: copy(s.buf, remainBuf) . . 67: return s.buf[s.cursor:] . . 68:} . . 69: . . 70:func (s *stream) read() bool { ``` ![profile001](https://user-images.githubusercontent.com/28812678/116545610-f02c8900-a8f0-11eb-9045-040376f944e1.png)And a benchmark I've put at the bottom of
benchmark/decode_test.go
, which simulates such a slow reader. It isn't perfect, it also does repeated byte array allocations as well, and I also seem to be hitting some other bugs, but when it gets to the chunk size of 64 bytes, it reliably crashes withand
github.com/goccy/go-json.(*stream).readBuf(...)
in the stack trace.You can put the following after the
s.r.Read()
line ins.read()
to see how the buffer keeps doubling and doubling, even though it isn't necessary.