mchaput / whoosh

Pure-Python full-text search library
Other
569 stars 69 forks source link

Exact matching does not work #29

Open rafikg opened 2 years ago

rafikg commented 2 years ago

Hi @mchaput

I want to search for a phrase in the index:

Here is MWE:

import os
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT

from whoosh.qparser import QueryParser
from whoosh import scoring
from whoosh.index import open_dir
from whoosh.query import Phrase
import sys

def createSearchableData(list_docs):   

    schema = Schema(textdata=TEXT(stored=True))
    if not os.path.exists("indexdir"):
        os.mkdir("indexdir")

    # Creating a index writer to add document as per schema
    ix = create_in("indexdir",schema)
    writer = ix.writer()

    for text in list_docs:
        writer.add_document(textdata=text)
    writer.commit()

createSearchableData(['we are looking for a Java Developer in CA area. \
 Java developer should have a strong knowledge in java programming. \
  He/she must be able to work as GUI developer'])

ix = open_dir("indexdir")

query_txt =  Phrase("textdata", [u"Java", u"developer"]) # return empty results
query_txt =  Phrase("textdata", [u"java", u"developer"]) # return the docs but by 

# printing the fragments, it looks that searcher does not use exact matching as 
# described. in the Phrase class documentation. It matches `java` in java programming 
# and it matchs `developer` in GUI developer!

searcher = ix.searcher(weighting=scoring.Frequency)
# query = QueryParser("content", ix.schema).parse(query_txt)
results = searcher.search(query_txt,limit=10)
fragments = []
for hit in results:
    fragment = hit.highlights(fieldname="textdata", top=10)
    fragments.append(fragment)
print(frragments)
  1. Why searcher works only with lower_case query?

  2. Why it does not match exactly?