import jieba
from collections import Counter
import re

with open('xiyouji.txt', 'r', encoding='utf-8') as f:
text = f.read()

words = jieba.lcut(text)

filtered_words = []
for word in words:
if len(word) > 1 and not re.match(r'[^\w\s]', word):
filtered_words.append(word)

word_counts = Counter(filtered_words)

top_20_words = word_counts.most_common(20)

for word, count in top_20_words:
print(f"{word}: {count}")

posted on 2025-06-17 08:00  雨水啊  阅读(3)  评论(0)    收藏  举报