Jieba分词之《西游记》
1 #Jouney To The West 2-42 2 import jieba 3 4 txt=open("Jounery to the West.txt","r",encoding='gb18030').read() 5 words=jieba.lcut(txt)#精确模式切分汉字 6 counts={} 7 for word in words:#重名 8 if len(word)==1:#排除单个字 9 continue 10 elif word=="猴王" or word=="孙悟空" or word=="美猴王" or word=="悟空" or word=="行者" or word=="孙行者" or word=="大圣": 11 rword="猴哥" 12 elif word=="沙和尚" or word=="沙僧" or word=="沙悟净": 13 rword="沙和尚" 14 elif word=="师父" or word=="唐僧": 15 rword="唐僧" 16 elif word=="八戒" or word=="卷帘大将": 17 rword="八戒" 18 else: 19 rword=word 20 counts[rword]=counts.get(rword,0)+1 21 22 items=list(counts.items()) 23 items.sort(key=lambda x:x[1],reverse=True)#排序,逆序 24 for i in range(20):#前20位 25 word,count=items[i] 26 print("{0:<10}{1:>5}".format(word,count))
文章下载来源www.qinkan.net/book/563
运行结果

如果不需要除了名字以外的分词:
exclude={...} for word in exclude: del(counts[word])

浙公网安备 33010602011771号