期末综合大作业:词频统计
geFile = open("ge.txt",mode="r",encoding='utf-8') geText = geFile.read() geFile.close() print(geText) replaceList = [',','.',"'",'\n'] for c in replaceList: geText = geText.replace(c,' ') print(geText) print(geText.split(' ')) geList = geText.split(' ') geSet = set(geList) print(geSet) geDict = {} for word in geSet: geDict[word] = geList.count(word) print(geDict) for d in geDict: print(d,geDict[d]) wordCountList = list(geDict.items()) print(wordCountList) wordCountList.sort(key=lambda x:x[1],reverse=True) print(wordCountList) for i in range(20): print(wordCountList) geCountFile = open('geCount.txt',mode='a',encoding='utf-8') for i in range(len(wordCountList)): geCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n') geCountFile.close()