def getTxt():
txt = open("zmy.txt").read()
txt = txt.lower()
for ch in '!"@#$%^&*()+,-./:;<=>?@[\\]_`~{|}':
txt.replace(ch," ")
return txt
zmy = getTxt().split()
sunstrset = set(zmy)
dic = {}
for i in sunstrset:
dic[i] = zmy.count(i)
wcList = list(dic.items())
def tskeSecond(elem):
return elem[1]
wcList.sort(key=tskeSecond,reverse=True)
print(wcList)
import jieba
word =open("zhong.txt","r",encoding="utf-8").read()
for ch in ",。“”":
word = word.replace(ch,"")
word = jieba.cut(word)
word = list(word)
print(word)
wordset =set(word)
worddic ={}
for i in wordset:
worddic[i]= word.count(i)
print(worddic)
wcList = list(worddic.items())
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)