西游记

计数

点击查看代码
#Journey to the west.py
import jieba
txt = open("西游记.txt", "r", encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
    if len(word) == 1:
        continue
    elif word == "行者" or word == "大圣":
        rword = "孙悟空"
    elif word == "八戒" or word == "呆子":
        rword = "猪八戒"
    elif word == "师父" or word == "三藏":
        rword = "唐僧"
    elif word == "三师弟" or word == "沙僧":
        rword = "悟净"
    else:
        rword = word
    counts[rword] = counts.get(rword,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True) 
for i in range(20):
    word, count = items[i]
    print ("{0:<10}{1:>5}".format(word, count))
    
posted @ 2025-06-23 11:41  昔桯  阅读(6)  评论(0)    收藏  举报