Open myaaaaaaaaa opened 4 months ago
A more easily reproducible test case below:
package main_test
import (
"runtime"
"strings"
"testing"
"github.com/itchyny/gojq"
)
func TestMemoryLeak(t *testing.T) {
// These should output 10 values.
testCases := []string{
`range(50000) | select((.%5000) == 0)`,
`range(50000) | if ((.%5000) == 0) then . else empty end`,
}
for _, tc := range testCases {
t.Run(strings.ReplaceAll(tc, " ", ""), func(t *testing.T) {
query, err := gojq.Parse(tc)
if err != nil {
t.Fatal(err)
}
iter := query.Run(nil)
memUsage := [10]float32{}
for i := range memUsage {
var memStats runtime.MemStats
runtime.GC()
runtime.ReadMemStats(&memStats)
memUsage[i] = float32(memStats.HeapAlloc)
if _, ok := iter.Next(); !ok {
t.Fatal("unexpected end of iterator")
}
}
if _, ok := iter.Next(); ok {
t.Fatal("unexpected extra result from iterator")
}
// Ignore the first and last values (likely outliers) just in case.
// This formula may need adjusting.
if memUsage[1]*1.2 < memUsage[8] { // if true {
t.Error("Possible memory leak detected:")
for _, m := range memUsage {
t.Errorf("%.1f kB", m/1024)
}
}
})
}
}
outputs:
=== RUN TestMemoryLeak
=== RUN TestMemoryLeak/range(50000)|select((.%5000)==0)
hello_test.go:40: Possible memory leak detected:
hello_test.go:42: 247.1 kB
hello_test.go:42: 247.2 kB
hello_test.go:42: 623.4 kB
hello_test.go:42: 1001.7 kB
hello_test.go:42: 1603.9 kB
hello_test.go:42: 1758.2 kB
hello_test.go:42: 1912.4 kB
hello_test.go:42: 2962.7 kB
hello_test.go:42: 3116.9 kB
hello_test.go:42: 3271.2 kB
=== RUN TestMemoryLeak/range(50000)|if((.%5000)==0)then.elseemptyend
--- FAIL: TestMemoryLeak (0.06s)
--- FAIL: TestMemoryLeak/range(50000)|select((.%5000)==0) (0.04s)
--- PASS: TestMemoryLeak/range(50000)|if((.%5000)==0)then.elseemptyend (0.02s)
FAIL
exit status 1
FAIL hello 0.063s
Hey! haven't looked closer but it sounds similar to some digging i did for https://github.com/itchyny/gojq/issues/86
Thanks for the pointers! pprof highlights these lines in env.Next()
for me:
. 222: case opcallrec:
. 223: pc, callpc, index = code.v.(int), -1, env.scopes.index
. 224: goto loop
. 225: case oppushpc:
63.50MB 226: env.push([2]int{code.v.(int), env.scopes.index})
. 227: case opcallpc:
. 228: xs := env.pop().([2]int)
. 229: pc, callpc, index = xs[0], pc, xs[1]
. 230: goto loop
. 231: case opscope:
. 232: xs := code.v.([3]int)
. 233: var saveindex, outerindex int
. 234: if index == env.scopes.index {
. 235: if callpc >= 0 {
. 236: saveindex = index
. 237: } else {
. 238: callpc, saveindex = env.popscope()
. 239: }
. 240: } else {
. 241: saveindex, _ = env.scopes.save()
. 242: env.scopes.index = index
. 243: }
. 244: if outerindex = index; outerindex >= 0 {
. 245: if s := env.scopes.data[outerindex].value; s.id == xs[0] {
. 246: outerindex = s.outerindex
. 247: }
. 248: }
. 249: env.scopes.push(scope{xs[0], env.offset, callpc, saveindex, outerindex})
. 250: env.offset += xs[1]
. 251: if env.offset > len(env.values) {
319.82MB 252: vs := make([]any, env.offset*2)
. 253: copy(vs, env.values)
. 254: env.values = vs
. 255: }
Benchmark code for reproduction:
package main_test
import (
"strings"
"testing"
"github.com/itchyny/gojq"
)
func BenchmarkSelect(b *testing.B) {
benchCases := []string{
`range(.) | select(false)`,
`range(.) | if (false) then . else empty end`,
}
for _, bc := range benchCases {
query, err := gojq.Parse(bc)
if err != nil {
b.Fatal(err)
}
b.Run(strings.ReplaceAll(bc, " ", ""), func(b *testing.B) {
iter := query.Run(b.N)
for {
_, ok := iter.Next()
if !ok {
break
}
}
})
}
}
Running the following script will result in unbounded memory growth:
However, manually inlining the body of
select()
will avoid triggering this issue: