词频统计

#1.
adcFile = open('adc.txt',mode="r",encoding='utf-8')
adcText = adcFile.read()
adcFile.close()
print(adcText)

#2.
replaceList = [',','.',"'",'\n']
for c in replaceList:
    adcText = adcText.replace(c,' ')
print(adcText)

#3.
print(adcText.split(' '))
adcList = adcText.split(' ')

#4.
adcSet = set(adcList)
print(adcSet)

adcDict = {}
for word in adcSet:
    adcDict[word] = adcList.count(word)

print(adcDict)
for d in adcDict:
    print(d,adcDict[d])

#5.
wordCountList = list(adcDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

#6.
for i in range(20):
    print(wordCountList)

#7.
adcCountFile = open('adcCount.txt',mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    adcCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
adcCountFile.close()

 

posted @ 2018-06-20 20:32  HaoVlikeyou  阅读(149)  评论(0)    收藏  举报