红楼梦

import jieba
from collections import Counter

读取《红楼梦》文本

with open("hongloumeng.txt", "r", encoding="utf-8") as f:
text = f.read()

使用jieba分词

words = jieba.lcut(text)

过滤掉单字和停用词(可选)

stopwords = [",", "。", "“", "”", "!", "?", "的", "了", "在", "是", "我", "你", "他"]
filtered_words = [word for word in words if len(word) > 1 and word not in stopwords]

统计词频

word_counts = Counter(filtered_words)

输出前20高频词

top_20 = word_counts.most_common(20)
for word, count in top_20:
print(f"{word}: {count}")

posted @ 2025-06-23 14:50  无聊了多少人  阅读(10)  评论(0)    收藏  举报