中英文词频统计
q =open('111.txt','r',encoding='utf-8') a = q.read().lower() q.close() d= a.replace(',',' ') c=d.replace("'" ,' ') e = c.split() print(e) j=set(e) exclude={'a','the','i','you','in','do','but','that','net','if','a','m','it'} j=j-exclude print(j) h={} for g in j: h[g]=e.count(g) for key in h: print(key,h[key]) i=list(h.items()) print(i) i.sort(key=lambda x:x[1],reverse=False) print(i)
import jieba q =open('333.txt','r',encoding='utf-8').read() wordsls=jieba.lcut(q) wcdict={} for word in wordsls: if len (word)==1: continue else:wcdict[word]=wcdict.get(word,0)+1 wcls=list(wcdict.items()) wcls.sort(key=lambda x:x[1],reverse=True) print(wcls) for i in range(7): print(wcls[i])