西游记相关的分词,出现次数最高的20个

 1 print("西游记人物出场次数:")
 2 import jieba
 3 import time
 4 start = time.perf_counter()
 5 txt = open("西游记.txt","r",encoding="utf-8").read()
 6 excludes={"一个","那里","怎么","我们","不知","两个","甚么","只见","不是","原来","不敢","闻言","如何"}
 7 rds = jieba.lcut(txt)
 8 counts = {}
 9 for word in words:
10     if len(word) == 1:
11         continue
12     elif word == "行者" or word == "大圣" or word == "老孙":
13         rword  = "悟空"
14     elif word == "师父" or word == "三藏" or word == "长老":
15         rword = "唐僧"
16     elif word == "和尚" or word == "呆子":
17         rword = "沙僧"
18     else:
19         rword = word
20     counts[rword] = counts.get(rword,0)+1
21 for word in excludes:
22     del counts[word]
23 items = list(counts.items())
24 items.sort(key = lambda x:x[1],reverse=True)
25 for i in range(9):
26     word,count=items[i]
27     print("{0:<10}{1:>5}次".format(word,count))
28 dur = time.perf_counter()-start
29 print("运行时间为{:.2f}s".format(dur))

 

posted @ 2021-11-13 23:50  熊能能  阅读(290)  评论(0)    收藏  举报