import os
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.qparser import QueryParser
from whoosh import scoring
from whoosh.index import open_dir
from whoosh.query import Phrase
import sys
def createSearchableData(list_docs):
schema = Schema(textdata=TEXT(stored=True))
if not os.path.exists("indexdir"):
os.mkdir("indexdir")
# Creating a index writer to add document as per schema
ix = create_in("indexdir",schema)
writer = ix.writer()
for text in list_docs:
writer.add_document(textdata=text)
writer.commit()
createSearchableData(['we are looking for a Java Developer in CA area. \
Java developer should have a strong knowledge in java programming. \
He/she must be able to work as GUI developer'])
ix = open_dir("indexdir")
query_txt = Phrase("textdata", [u"Java", u"developer"]) # return empty results
query_txt = Phrase("textdata", [u"java", u"developer"]) # return the docs but by
# printing the fragments, it looks that searcher does not use exact matching as
# described. in the Phrase class documentation. It matches `java` in java programming
# and it matchs `developer` in GUI developer!
searcher = ix.searcher(weighting=scoring.Frequency)
# query = QueryParser("content", ix.schema).parse(query_txt)
results = searcher.search(query_txt,limit=10)
fragments = []
for hit in results:
fragment = hit.highlights(fieldname="textdata", top=10)
fragments.append(fragment)
print(frragments)
Hi @mchaput
I want to search for a phrase in the index:
Here is MWE:
Why searcher works only with lower_case query?
Why it does not match exactly?