You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import os
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.qparser import QueryParser
from whoosh import scoring
from whoosh.index import open_dir
from whoosh.query import Phrase
import sys
def createSearchableData(list_docs):
schema = Schema(textdata=TEXT(stored=True))
if not os.path.exists("indexdir"):
os.mkdir("indexdir")
# Creating a index writer to add document as per schema
ix = create_in("indexdir",schema)
writer = ix.writer()
for text in list_docs:
writer.add_document(textdata=text)
writer.commit()
createSearchableData(['we are looking for a Java Developer in CA area. \
Java developer should have a strong knowledge in java programming. \
He/she must be able to work as GUI developer'])
ix = open_dir("indexdir")
query_txt = Phrase("textdata", [u"Java", u"developer"]) # return empty results
query_txt = Phrase("textdata", [u"java", u"developer"]) # return the docs but by
# printing the fragments, it looks that searcher does not use exact matching as
# described. in the Phrase class documentation. It matches `java` in java programming
# and it matchs `developer` in GUI developer!
searcher = ix.searcher(weighting=scoring.Frequency)
# query = QueryParser("content", ix.schema).parse(query_txt)
results = searcher.search(query_txt,limit=10)
fragments = []
for hit in results:
fragment = hit.highlights(fieldname="textdata", top=10)
fragments.append(fragment)
print(frragments)
Why searcher works only with lower_case query?
Why it does not match exactly?
The text was updated successfully, but these errors were encountered:
Hi @mchaput
I want to search for a phrase in the index:
Here is MWE:
Why searcher works only with lower_case query?
Why it does not match exactly?
The text was updated successfully, but these errors were encountered: