利用第三方库jieba进行中文分词

# -*- coding:utf-8 -*-
import jieba
fp = open('2018.txt', 'r', encoding="utf-8")
text = fp.read()
fp.close()

ls = jieba.lcut(text)
d = {}
res = {}
special_char = ['\n', '', '', '', '', '','','','', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
for item in ls:
    d[item] = d.get(item, 0) + 1
for k in d:
    if d[k]>=20 and k not in special_char:
        res[k] = d[k]
for item in sorted(res, key=d.__getitem__):
    print("{}:{}".format(item, res[item]))

 

posted @ 2019-06-20 17:48  n0page404  阅读(118)  评论(0)    收藏  举报