jieba 分词
jieba分词:
import jieba with open("C:\\Users\\86133\\Desktop\\liaozhai.txt", "r", encoding='utf_8') as f: words = jieba.lcut(f.read()) counts = {} for word in words: if len(word) == 1: continue elif word == "举孝廉" or word == "王公大人": rword = "刘孝廉" elif word == "厉鬼" or word == "鬼": rword = "群鬼" else: rword = word counts[rword] = counts.get(rword, 0) + 1 items = list(counts.items()) items.sort(key=lambda x: x[1], reverse=True) for i in range(20): word, count = items[i] print("{0:<10}{1:>5}".format(word, count))

浙公网安备 33010602011771号