西游记人物名字出现次数统计
import jieba
from collections import Counter
人物别名映射表
name_mapping = {
"孙猴子": "孙悟空",
"猴王": "孙悟空",
"齐天大圣": "孙悟空",
"大圣": "孙悟空",
"行者": "孙悟空",
"美猴王": "孙悟空",
"唐僧": "唐三藏",
"三藏": "唐三藏",
"玄奘": "唐三藏",
"猪八戒": "猪刚鬣",
"八戒": "猪刚鬣",
"天蓬": "猪刚鬣",
"沙僧": "沙和尚",
"悟净": "沙和尚",
"白龙马": "小白龙",
"如来": "如来佛"
}
读取《西游记》文本
with open('xiyouji.txt', 'r', encoding='utf-8') as f:
text = f.read()
分词并过滤(只保留长度大于1的词)
words = [word for word in jieba.lcut(text) if len(word) > 1]
合并人物别名
processed_words = [name_mapping.get(word, word) for word in words]
统计词频并输出Top20
print("《西游记》高频词Top20(已合并人物别名):")
for word, count in Counter(processed_words).most_common(20):
print(f"{word}: {count}次")


浙公网安备 33010602011771号