jieba 分词

jieba分词:

import jieba

with open("C:\\Users\\86133\\Desktop\\liaozhai.txt", "r", encoding='utf_8') as f:
    words = jieba.lcut(f.read())
counts = {}
for word in words:
    if len(word) == 1:
        continue
    elif word == "举孝廉" or word == "王公大人":
        rword = "刘孝廉"
    elif word == "厉鬼" or word == "":
        rword = "群鬼"
    else:
        rword = word
    counts[rword] = counts.get(rword, 0) + 1
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)

for i in range(20):
    word, count = items[i]
    print("{0:<10}{1:>5}".format(word, count))

 

posted @ 2023-12-28 21:11  LXxx007  阅读(6)  评论(0)    收藏  举报