olivernn / lunr.js

A bit like Solr, but much smaller and not as bright
http://lunrjs.com
MIT License
8.89k stars 548 forks source link

Unable to search bigram #406

Open AshokChava opened 5 years ago

AshokChava commented 5 years ago

I have created a custom custom tokenizer to create a index with bigrams and also created a another tokenizer to convert the search term to bigrams. But not able to find the results though there is a matching bigram available in inverted index. Not sure what I am doing wrong. please suggest

`function createLunrIndex(jsonObj){ var pipeline = new lunr.Pipeline var bigram = function (token, idx, tokens) { if(tokens[idx + 1]!==undefined){ return token + " " + tokens[idx + 1] } }

lunr.Pipeline.registerFunction(bigram, 'bigramtokenizer')
return lunr(function () {
    //this.tokenizer(bigram)
    this.pipeline.add(bigram)
    this.searchPipeline.add(bigram)
    this.ref('id')
    this.field('top_terms',{boost:100})
    this.field('terms',{boost:50})
    this.field('title')
    this.field('description')
    this.field('url')
    //this.metadataWhitelist = ['position']
    jsonObj.items.forEach(function (item) {
            this.add(item)
        }, this)

});

}`

using the following code to search `function lunrSearch(lunrIndex, directory, keyword) {

customTokenizer= function (obj, metadata) {
    var str = obj.toString().trim().toLowerCase().split(" "),
len = str.length,
    tokens = []
    var tokenMetadata = lunr.utils.clone(metadata) || {}
    tokenMetadata["index"] = tokens.length
    for(i=0;i<str.length-1;i++){

            tokens.push(
            new lunr.Token (
                (str[i]+" "+str[i+1]),
        tokenMetadata
      )
    )
    }
    return tokens;
}

var cMaxResult=5
var queryResult = lunrIndex.query(function(query) {

  customTokenizer(keyword).forEach(function(token) {
        query.term("['"+token.toString()+"']", { fields: [ "top_terms","terms","title", "description", "url"]}) // search other fields

    });
});

return queryResult;

} `

olivernn commented 5 years ago

Could you put together something on jsfiddle or similar that demonstrates the problem? That will make it much easier to dig into what is going wrong.