红楼梦
import jieba
from collections import Counter
读取《红楼梦》文本
with open("hongloumeng.txt", "r", encoding="utf-8") as f:
text = f.read()
使用jieba分词
words = jieba.lcut(text)
过滤掉单字和停用词(可选)
stopwords = [",", "。", "“", "”", "!", "?", "的", "了", "在", "是", "我", "你", "他"]
filtered_words = [word for word in words if len(word) > 1 and word not in stopwords]
统计词频
word_counts = Counter(filtered_words)
输出前20高频词
top_20 = word_counts.most_common(20)
for word, count in top_20:
print(f"{word}: {count}")

浙公网安备 33010602011771号