红楼梦

import jieba
txt=open('D:/红楼梦/红楼梦.txt',"r",encoding='utf-8').read()
excludes = {"什么","一个","我们","你们","如今","说道","知道","姑娘",\
"起来","这里","出来","众人","那里","自己",\
"太太","一面","只见","两个","没有","怎么","不是","不知","这个","听见",\
"这样","进来","咱们","就是","东西","告诉","回来","回来","只是","大家",\
"老爷","只得","这些","他们","丫头","不敢","出去","所以","薛姨妈","不过",\
"不好","姐姐","的话","一时","鸳鸯","过来","不能","心里","二爷","过来",\
"如此","银子","今日","二人","答应","她们","那么","几个","还有","只管","说话",\
"那边","一回","这么"}
words =jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
elif word == "凤姐" or word == "王夫人" or word =="凤姐儿":
rword = "王熙凤"
elif word == "老太太" or word =="贾母" or word =="奶奶":
rword = "贾母"
else:
rword = word
counts[rword] = counts.get(rword,0) + 1

for word in excludes:
del counts[word]
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse = True)
for i in range(20):
word,count=items[i]
print("{0:<10}{1:>5}".format(word,count))

 

posted @ 2021-11-14 11:57  徐匡奕达  阅读(40)  评论(0)    收藏  举报