#1
weFile =open('we.tet',mode='r',encoding='utf-8')
weText =weFile.read() #从文件里读出全部文本,字符串
weFile.close()
print(weText)
#2
replaceList =[',','.',"'",'\n']
for c in replaceList:
weText = weText.replace(c,'') #替换掉所以标点符号
weText = weText.replace(' ','')
print(weText)
#3
print(weText.split(''))
weList = weText.split('') # 列表 出现掉单词序列
#1
weSet =set(weList) #集合 有哪些单词
print(weSet)
weSet =set(weList) #集合:有哪些单词
print(weSet)
weDict = {} #字典:每个单词掉统计次数
for word in weSet:
weDict[word] = weList.count(word)
print(weDict)
#5 排序
wordCountList = list(weDict.items()) #字典没有顺序,不能排序,转换成列表进行排序
print(wordCountList)
wordCountList.sort(ket=lambda x:x[1],reverse=True) #进行排序
print(wordCountList)
#6.输出Top20
for i in range (20):
print(wordCountList[i])
#7 写入文件
weCountFile = open('weCount.txt',mode='a',encoding='utf-8')
for i in range (len(wordCountList)):
weCountFile.write(str(wordCountList[i][1]))+''+wordCountList[i][0]+'\n'
weCountFile.close()
''
''