期末综合大作业:词频统计

#1.
jFile = open('j.txt',mode='r',encoding='utf-8')
jText = jFile.read()
jFile.close()
print(jText)

#2.
replaceList = [',',"'",'-','\n']
for c in replaceList:
    jText = jText.replace(c,' ')
print(jText)

#3.
print(jText.split(' '))
jList = jText.split(' ')

#4.
jSet = set(jList)
print(jSet)

jDict = {}
for word in jSet:
   jDict[word] = jList.count(word)

print(jDict)
for d in jDict:
    print(d,jDict[d])

#5.
wordCountList = list(jDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

#6.
for i in range(20):
    print(wordCountList[i])

#7.
jCountFile = open('ljjCount.txt',mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    jCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
jCountFile.close()

posted @ 2018-06-20 20:43  刘海茵  阅读(153)  评论(0编辑  收藏  举报