google / zoekt

Fast trigram based code search
1.69k stars 113 forks source link

zoekt cannot handle '[\s\S]a' #82

Closed ijt closed 5 years ago

ijt commented 5 years ago
[ ~/src/github.com/sourcegraph/sourcegraph ] zoekt '[\s\S]a'
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/sourcegraph_v15.00000.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 55 [running]:
runtime/debug.Stack(0xc000724240, 0xc005ee5fc0, 0x3c)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724240, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc000216190)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc005b8fa08, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b800, 0x14b1940, 0xc000216150, 0xc0065ed990, 0x8, 0x178b8b8, 0xc000214100)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b800, 0x14b1920, 0xc000214100, 0x14b1920, 0xc000214100, 0xc0065edec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc00003b800, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724240, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/a_v15.00000.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 58 [running]:
runtime/debug.Stack(0xc000724200, 0xc0011900c0, 0x32)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724200, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc00004cca0)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc0010c6c48, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc000062800, 0x14b1940, 0xc00004cc70, 0xc000049990, 0x8, 0x178b8b8, 0xc0007242c0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc000062800, 0x14b1920, 0xc0007242c0, 0x14b1920, 0xc0007242c0, 0xc000049ec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc000062800, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724200, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/sourcegraph_v15.00001.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 57 [running]:
runtime/debug.Stack(0xc000724260, 0xc0065fa080, 0x3c)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724260, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc006600040)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc006606020, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003bc00, 0x14b1940, 0xc006600010, 0x2000, 0x8, 0x1881248, 0xc00000e120)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003bc00, 0x14b1920, 0xc00000e120, 0x14b1920, 0xc00000e120, 0xc0065f9ec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc00003bc00, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724260, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
2019/05/01 15:10:26 crashed shard: shard(/Users/issactrotts/.zoekt/sourcegraph_v15.00002.zoekt): regexp: Compile(`(?i)[^]a`): error parsing regexp: missing closing ]: `[^]a`, goroutine 56 [running]:
runtime/debug.Stack(0xc000724220, 0xc00118c1c0, 0x3c)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/debug/stack.go:24 +0x9d
github.com/google/zoekt/shards.searchOneShard.func1(0x14b92c0, 0xc000724220, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:228 +0x9c
panic(0x139c300, 0xc00008a1f0)
        /usr/local/Cellar/go/1.12/libexec/src/runtime/panic.go:522 +0x1b5
regexp.MustCompile(0xc001198020, 0x8, 0x4)
        /usr/local/Cellar/go/1.12/libexec/src/regexp/regexp.go:272 +0x159
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b400, 0x14b1940, 0xc00008a1c0, 0xc0065f1990, 0x8, 0x178b8b8, 0xc0000940c0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:479 +0x88e
github.com/google/zoekt.(*indexData).newMatchTree(0xc00003b400, 0x14b1920, 0xc0000940c0, 0x14b1920, 0xc0000940c0, 0xc0065f1ec0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/matchtree.go:501 +0x11e6
github.com/google/zoekt.(*indexData).Search(0xc00003b400, 0x14b8dc0, 0xc0000a8280, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0x0, 0x0, 0x0)
        /Users/issactrotts/src/github.com/google/zoekt/eval.go:120 +0x387
github.com/google/zoekt/shards.searchOneShard(0x14b8dc0, 0xc0000a8280, 0x14b92c0, 0xc000724220, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:236 +0xbd
github.com/google/zoekt/shards.(*shardedSearcher).Search.func2(0xc0000a6360, 0xc00004cc40, 0x14b1940, 0xc00004c5b0, 0xc001190000, 0xc0000a62a0)
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:170 +0x8f
created by github.com/google/zoekt/shards.(*shardedSearcher).Search
        /Users/issactrotts/src/github.com/google/zoekt/shards/shards.go:168 +0x5c0
ijt commented 5 years ago

Adding this test to parse_test.go might be useful:

func Test_regexpQuery(t *testing.T) {
    t.Run("does not make valid regexps not compile", func(t *testing.T) {
        pats := []string {
            `[\s\S]b`,
        }
        for _, pat := range pats {
            t.Run(pat, func(t *testing.T) {
                // The given pats should all be valid.
                _, err := regexp.Compile(pat)
                if err != nil {
                    t.Fatal(err)
                }

                // Check to see that regexpQuery doesn't somehow produce an invalid result.
                rx2, err := regexpQuery(pat, false, false)
                if err != nil {
                    t.Fatal(err)
                }
                _, err = regexp.Compile(rx2.String())
                if err != nil {
                    t.Errorf("compiling stringified version of regexpQuery() output: %v", err)
                }
            })
        }
    })
}
hanwen commented 5 years ago

I think it's actually a bug in the regexp library.

\s is space, \S is not space [\s\S] should not translate to '[^]' but to '.'

ijt commented 5 years ago

You're right!

package main

import (
    "regexp/syntax"
    "testing"
)

func TestRegexpParse(t *testing.T) {
    flags := syntax.PerlX
    pat := `[\s\S]b`
    rx, err := syntax.Parse(pat, flags)
    if err != nil {
        t.Fatal(err)
    }
    if rx.String() != pat {
        t.Error("syntax.Parse() = ", rx.String(), ", want ", pat)
    }
}
[ ~/src/github.com/ijt/rx ] go test ./...
--- FAIL: TestRegexpParse (0.00s)
    main_test.go:16: syntax.Parse() =  [^]b , want  [\s\S]b
FAIL
FAIL    github.com/ijt/rx   0.006s

I'll file it upstream.

ijt commented 5 years ago

Oh, looks like you already filed it: https://github.com/golang/go/issues/31807.

hanwen commented 5 years ago

this is fixed in golang.