1 import jieba
2 txt=open('D:\桌面\西游记.txt',"r",encoding='utf-8').read()
3 excludes={"什么","一个","那里","怎么","我们","不知","两个","甚么",\
4 "不是","只见","原来","不敢","如何","这个","不曾","闻言",\
5 "正是","只是","出来","一声", "真个", "不得", "这里", \
6 "今日" ,"那个","不见"}
7
8 words =jieba.lcut(txt)
9 counts = {}
10 for word in words:
11 if len(word) == 1:
12 continue
13 elif word == "行者" or word == "大圣" or \
14 word =="老孙" or word =="悟空" or word =="孙行者":
15 rword = "孙悟空"
16 elif word == "师父" or word =="三藏" or word =="长老":
17 rword = "唐僧"
18 elif word =="呆子":
19 rword ="八戒"
20 elif word =="那怪" or word =="小妖" :
21 rword ="妖精"
22 else:
23 rword = word
24 counts[rword] = counts.get(rword,0) + 1
25
26 for word in excludes:
27 del counts[word]
28 items = list(counts.items())
29 items.sort(key=lambda x:x[1], reverse = True)
30 for i in range(20):
31 word,count=items[i]
32 print("{0:<10}{1:>5}".format(word,count))
![]()