blevesearch / bleve

A modern text/numeric/geo-spatial/vector indexing library for go
Apache License 2.0
9.83k stars 669 forks source link

html search error #1894

Open mywander opened 8 months ago

mywander commented 8 months ago

I index html contents. but search error: `msg := []struct { Id string Body string }{{Id: "1", Body: "

You trusted bbs server all proxies
"}, {Id: "2", Body: "
this is NOT bbs server safe
"}}

tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idxMapping := bleve.NewIndexMapping()
idxMapping.DefaultAnalyzer = "en"
if err := idxMapping.AddCustomAnalyzer("custom-html", map[string]interface{}{
    "type":         custom.Name,
    "tokenizer":    "unicode",
    "char_filters": []interface{}{html_char_filter.Name},
}); err != nil {
    t.Fatal(err)
}

fm := mapping.NewTextFieldMapping()
fm.Analyzer = "custom-html"

idxMapping.DefaultMapping.AddFieldMappingsAt("Body", fm)
idx, err := bleve.New(tmpIndexPath, idxMapping)
if err != nil {
    t.Fatal(err)
}

defer func() {
    err = idx.Close()
    if err != nil {
        t.Fatal(err)
    }
}()
for _, v := range msg {
    idx.Index(v.Id, v)
}

keywords := []string{"bbs", "server"}
for _, v := range keywords {
    query := bleve.NewQueryStringQuery(v)
    searchRequest := bleve.NewSearchRequest(query)
    searchResult, err := idx.Search(searchRequest)
    if err != nil {
        panic(err)
    }
    if searchResult.Hits.Len() > 0 {
        fmt.Println("Search ", v, " found ", searchResult.Hits.Len())
    } else {
        fmt.Println("Search ", v, " Not found!")
    }
}`

Search "bbs" no results, but search "server" give 2 results.

CascadingRadium commented 8 months ago

Hello;

The problem you're encountering is because the query you're executing (the query string) doesn't explicitly mention a field, which means it defaults to searching in the general field (_all). As a result, the analyzer applied to the query string is the default analyzer, which you've defined as "en." To resolve this problem, please include the "Body" field in your query string. This will utilize the custom analyzer you've configured for the "Body" field in the default mapping.

Heres the updated code:

tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idxMapping := NewIndexMapping()
idxMapping.DefaultAnalyzer = "en"
if err := idxMapping.AddCustomAnalyzer("custom-html", map[string]interface{}{
    "type":         custom.Name,
    "tokenizer":    "unicode",
    "char_filters": []interface{}{html_char_filter.Name},
}); err != nil {
    t.Fatal(err)
}
msg := []struct {
    Id   string
    Body string
}{
    {
        Id:   "1",
        Body: "You trusted bbs server all proxies",
    },
    {
        Id:   "2",
        Body: "this is NOT bbs server safe",
    },
}

fm := mapping.NewTextFieldMapping()
fm.Analyzer = "custom-html"

idxMapping.DefaultMapping.AddFieldMappingsAt("Body", fm)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
    t.Fatal(err)
}

defer func() {
    err = idx.Close()
    if err != nil {
        t.Fatal(err)
    }
}()
for _, v := range msg {
    idx.Index(v.Id, v)
}

keywords := []string{"Body:bbs", "Body:server"}
for _, v := range keywords {
    query := NewQueryStringQuery(v)
    searchRequest := NewSearchRequest(query)
    searchResult, err := idx.Search(searchRequest)
    if err != nil {
        panic(err)
    }
    if searchResult.Hits.Len() > 0 {
        fmt.Println("Search "+v+" found ", searchResult.Hits.Len())
    } else {
        fmt.Println("Search " + v + " Not found!")
    }
}