期末综合大作业:词频统计
#1 f = open('Les Miserables悲惨世界.txt',mode='r',encoding='utf-8') fText = f.read()#从文件里独处全部文本,字符串 print(fText) #2 replacelist = ['?','.',',',':','"',"'"] for c in replacelist: fText = fText.replace(c,'')#替换掉所有标点符号 print(fText) #3 print(fText.split(' ')) fList = fText.split(' ')#列表出现的单词序列 #4 fSet = set(fList)#集合:有哪些单词 print(fSet) fDict = {} for word in fSet: fDict[word]=fList.count(word) print(fDict) for d in fDict: print(d,fDict[d]) #5 wordCountList = list(fDict.items()) print(wordCountList) wordCountList.sort(key=lambda x:x[1],reverse=True) print(wordCountList) #6 for i in range(20): print(wordCountList[i]) #7 fCountFile = open('fText.txt',mode='a',encoding='utf-8') for i in range(len(wordCountList)): fCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n') fCountFile.close()