jieba分词-红楼梦-次数前20

 1 import jieba
 2 
 3 # 读取文本文件
 4 path = "红楼梦.txt"
 5 file = open(path, "r", encoding="utf-8")
 6 text = file.read()
 7 file.close()
 8 
 9 # 使用jieba分词
10 words = jieba.lcut(text)
11 
12 # 统计词频
13 counts = {}
14 for word in words:
15     # 过滤掉长度为1的词语
16     if len(word) == 1:
17         continue
18     # 更新字典中的词频
19     counts[word] = counts.get(word, 0) + 1
20 
21 # 对字典中的键值对进行排序
22 items = list(counts.items())
23 items.sort(key=lambda x: x[1], reverse=True)
24 
25 # 输出前20个高频词语
26 for i in range(20):
27     word, count = items[i]
28     print(f"{word:<10}{count:>5}")

 

posted @ 2023-12-28 23:36  累了睡大觉  阅读(51)  评论(0)    收藏  举报