中文词频统计

import jieba

file=open('pingfandeshijie','r',encoding = 'utf-8')

wordList=list(jieba.cut(file.read()))
wordDict={}
for word in wordList:
if(len(word)==1):
continue
wordDict[word]= wordList.count(word)

wordListSort=sorted(wordDict.items(),key=lambda d: d[1],reverse=True)

for i in range(20):
if i>= len(wordListSort):
break
print(wordListSort[i])

 

posted on 2018-03-28 21:53  181冯荣彬  阅读(85)  评论(0编辑  收藏  举报

导航