jieba 库分词之聊斋
由于聊斋没有固定的主角,角色也只有称呼,因此后面的非名词不再排除
import jieba
txt=open("D:\spyder_lianxi\聊斋志异.txt","r",encoding='utf-8').read()
excludes={"于是","没有","一个","已经","不能","什么","什么","起来","知道","可以","出来","忽然","一天","不敢","这样","回家","高兴","看见","十一","有个","一样","回来","只是","不知"}
words=jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
else:
counts[word]=counts.get(word,0)+1
for word in excludes:
del(counts[word])
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(20):
word,count=items[i]
print("{0:<10}{1:>5}".format(word,count))

浙公网安备 33010602011771号