wewqeqf

#1
weFile =open('we.tet',mode='r',encoding='utf-8')
weText =weFile.read()  #从文件里读出全部文本,字符串
weFile.close()
print(weText)

#2
replaceList =[',','.',"'",'\n']
for c in replaceList:
    weText = weText.replace(c,'')  #替换掉所以标点符号
weText = weText.replace(' ','')
print(weText)

#3
print(weText.split(''))
weList = weText.split('')  # 列表 出现掉单词序列

#1
weSet =set(weList)  #集合 有哪些单词
print(weSet)
weSet =set(weList)  #集合:有哪些单词
print(weSet)

weDict = {} #字典:每个单词掉统计次数
for word in weSet:
    weDict[word] = weList.count(word)
print(weDict)

#5 排序
wordCountList = list(weDict.items()) #字典没有顺序,不能排序,转换成列表进行排序
print(wordCountList)
wordCountList.sort(ket=lambda x:x[1],reverse=True) #进行排序
print(wordCountList)


#6.输出Top20
for i in range (20):
    print(wordCountList[i])

#7 写入文件
weCountFile = open('weCount.txt',mode='a',encoding='utf-8')
for i in range (len(wordCountList)):
    weCountFile.write(str(wordCountList[i][1]))+''+wordCountList[i][0]+'\n'
weCountFile.close()
''
''

  

posted @ 2018-06-20 21:15  王佳涵  阅读(141)  评论(0)    收藏  举报