期末综合大作业:词频统计

yaFile = open('ya.txt','r',encoding= 'utf-8')
yaText = yaFile.read()
yaFile.close()
print(yaText)

replacelist = [',',"'",'\n']
for c in replacelist:
    yaText = yaText.replace(c,' ')
print(yaText)

#列表
print(yaText.split(' '))
yalist = yaText.split(' ')
#集合中有哪些单词
yaSet = set(yalist)
print(yaSet)
#统计单词字数
yaDict =  {}
for word in yaSet:
    yaDict[word]=yalist.count(word)

print(yaDict)
for d in yaDict:
    print(d,yaDict[d])
#排序
wordCountList = list(yaDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)
#输出top20
for i in range(20):
    print(wordCountList[i])
#写入文件
yaCountFile = open('yaCount.txt',mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    yaCountFile.write(str(wordCountList[i][1]+' '+wordCountList[i][0]+'\n'))
yaCountFile.close()

 

posted @ 2018-06-18 19:41  梁晓兵  阅读(146)  评论(0编辑  收藏  举报