百事可乐乐乐

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::
#CalThreeKingdomsV2.py 
 import jieba excludes = {"将军","却说","荆州","二人","不可","不能","如此"} txt = open("threekingdoms.txt", "r", encoding='utf-8').read() words  = jieba.lcut(txt) counts = {} for word in words:     if len(word) == 1:         continue     elif word == "诸葛亮" or word == "孔明曰":         rword = "孔明"     elif word == "关公" or word == "云长":         rword = "关羽"     elif word == "玄德" or word == "玄德曰":         rword = "刘备"     elif word == "孟德" or word == "丞相":         rword = "曹操"     else:         rword = word     counts[rword] = counts.get(rword,0) + 1 for word in excludes:     del counts[word] items = list(counts.items()) items.sort(key=lambda x:x[1], reverse=True)  for i in range(10):     word, count = items[i]     print ("{0:<10}{1:>5}".format(word, count))

 

posted on 2020-04-22 14:30  煎饼果子*  阅读(135)  评论(0)    收藏  举报