利用第三方库jieba进行中文分词
# -*- coding:utf-8 -*- import jieba fp = open('2018.txt', 'r', encoding="utf-8") text = fp.read() fp.close() ls = jieba.lcut(text) d = {} res = {} special_char = ['\n', '(', ')', ',', '。', '','、','、','“', '”', '较', '好', '了', ';', '把', '上', '对', '等', '与', '为', '的', '在', '和', '新', '以', '较', '好', '了'] for item in ls: d[item] = d.get(item, 0) + 1 for k in d: if d[k]>=20 and k not in special_char: res[k] = d[k] for item in sorted(res, key=d.__getitem__): print("{}:{}".format(item, res[item]))


浙公网安备 33010602011771号