西游记人物名字出现次数统计

import jieba
from collections import Counter

人物别名映射表

name_mapping = {
"孙猴子": "孙悟空",
"猴王": "孙悟空",
"齐天大圣": "孙悟空",
"大圣": "孙悟空",
"行者": "孙悟空",
"美猴王": "孙悟空",
"唐僧": "唐三藏",
"三藏": "唐三藏",
"玄奘": "唐三藏",
"猪八戒": "猪刚鬣",
"八戒": "猪刚鬣",
"天蓬": "猪刚鬣",
"沙僧": "沙和尚",
"悟净": "沙和尚",
"白龙马": "小白龙",
"如来": "如来佛"
}

读取《西游记》文本

with open('xiyouji.txt', 'r', encoding='utf-8') as f:
text = f.read()

分词并过滤(只保留长度大于1的词)

words = [word for word in jieba.lcut(text) if len(word) > 1]

合并人物别名

processed_words = [name_mapping.get(word, word) for word in words]

统计词频并输出Top20

print("《西游记》高频词Top20(已合并人物别名):")
for word, count in Counter(processed_words).most_common(20):
print(f"{word}: {count}次")

posted @ 2025-06-20 20:02  kk/  阅读(131)  评论(0)    收藏  举报