代码改变世界

期末综合大作业:词频统计

2018-06-20 21:37  lulululululululu  阅读(200)  评论(0编辑  收藏  举报
#1.
theFile = open('the.txt',mode="r",encoding='utf-8')
theText = theFile.read()
theFile.close()
print(theText)

#2.
replaceList = [',','.',"'",'\n']
for c in replaceList:
    theText = theText.replace(c,' ')
print(theText)

#3.
print(theText.split(' '))
theList = theText.split(' ')

#4.
theSet = set(theList)
print(theSet)

theDict = {}
for word in theSet:
    theDict[word] = theList.count(word)

print(theDict)
for d in theDict:
    print(d,theDict[d])

#5.
wordCountList = list(theDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

#6.
for i in range(20):
    print(wordCountList)

#7.
theCountFile = open('theCount.txt',mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    theCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
theCountFile.close()