完整的中英文词频统计

def getTxt():
    txt = open("zmy.txt").read()
    txt = txt.lower()
    for ch in '!"@#$%^&*()+,-./:;<=>?@[\\]_`~{|}':
        txt.replace(ch," ")
    return txt

 
zmy = getTxt().split()
sunstrset = set(zmy)
 
dic = {}
for i in sunstrset:
    dic[i] = zmy.count(i)

 
 
wcList = list(dic.items())
 
def tskeSecond(elem):
    return elem[1]
 
wcList.sort(key=tskeSecond,reverse=True)
print(wcList)


import jieba
word =open("zhong.txt","r",encoding="utf-8").read()
  
for ch in ",。“”":
    word = word.replace(ch,"")
  
word = jieba.cut(word)
word = list(word)
print(word)
  
  
wordset =set(word)
worddic ={}
  
for i in wordset:
    worddic[i]= word.count(i)
print(worddic)
  
wcList = list(worddic.items())
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)

 

 

 

posted on 2018-10-19 22:04  扁儿  阅读(120)  评论(0编辑  收藏  举报

导航