1 import jieba
2
3 # 读取文本文件
4 path = "红楼梦.txt"
5 file = open(path, "r", encoding="utf-8")
6 text = file.read()
7 file.close()
8
9 # 使用jieba分词
10 words = jieba.lcut(text)
11
12 # 统计词频
13 counts = {}
14 for word in words:
15 # 过滤掉长度为1的词语
16 if len(word) == 1:
17 continue
18 # 更新字典中的词频
19 counts[word] = counts.get(word, 0) + 1
20
21 # 对字典中的键值对进行排序
22 items = list(counts.items())
23 items.sort(key=lambda x: x[1], reverse=True)
24
25 # 输出前20个高频词语
26 for i in range(20):
27 word, count = items[i]
28 print(f"{word:<10}{count:>5}")
![]()