# -*- coding: UTF-8 -*-
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.analysis import RegexAnalyzer
from whoosh.qparser import QueryParser
import os
analyzer = RegexAnalyzer(ur"([\u4e00-\u9fa5])|(\w+(\.?\w+)*)")
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True, analyzer=analyzer))
if not os.path.exists("indexdir"):
os.mkdir("indexdir")
ix = create_in("indexdir", schema)
writer = ix.writer()
writer.add_document(title=u"First document", path=u"/a",
content=u"This is the first document we’ve added!")
writer.add_document(title=u"Second document", path=u"/b",
content=u"The second one 你 中文测试中文 is even more interesting!")
writer.commit()
#使用with,尽早回句柄。冒号与缩进类似大括号
#with ix.searcher() as searcher:
# results = searcher.find("content", u"first")
# print results[0]
# results = searcher.find("content", u"你")
# print results[0]
# results = searcher.find("content", u"测试")
# print results[0]
# #print results[0].decode('UTF-8').encode('GBK')
# found = results.scored_length()
# if results.has_exact_length():
# print("Scored", found, "of exactly", len(results), "documents")
# else:
# low = results.estimated_min_length()
# high = results.estimated_length()
# print("Scored", found, "of between", low, "and", high, "documents")
qp = QueryParser("content", schema=ix.schema)
q = qp.parse(u"first")
#使用with,尽早回句柄。
with ix.searcher() as searcher:
results = searcher.search(q)
if results.is_empty():
print "not found"
else:
#访问各个要素
print results[0]
print results[0].rank
print results[0].docnum
print results[0].keys()
print results[0].score
print results[0]["title"]
print results[0]["path"]
print results[0]["content"]
浙公网安备 33010602011771号