jieba分词

import jieba
#添加jieba分词
mydict = ['聂小倩',"宁采臣",'燕赤霞','黑山老妖','辛十四娘']
for item in mydict:
    jieba.add_word(item)
txt = open('聊斋志异.txt',"r", encoding='utf-8').read()
#注意这里的路径要用\\,因为在python中\r表示表示将光标的位置回退到本行的开头
#定义别名列表
bieming = [["聂小倩","小倩"],[ "宁采臣" ],['燕赤霞'],['黑山老妖'],['辛十四娘']]

words=jieba.lcut(txt)
counts={}
for word in words:
    if len(word) ==1:
        continue
    else:
        counts[word] = counts.get(word,0)+1
#计算出场次数(各个别名的合计次数)
lst=list()
for i in range(5):
    lt=0
    for item in bieming[i]:
        lt += counts.get(item,0)
    lst.append(lt)
    
twelvechai= ['聂小倩',"宁采臣",'燕赤霞','黑山老妖','辛十四娘']
items=list()
for i in range(5):
    items.append([twelvechai[i],lst[i]])
items.sort(key=lambda x:x[1], reverse=True) 
for i in range(5):
    word, count = items[i]
    print ("{0:<10}{1:>5}".format(word, count))

 

posted @ 2020-11-14 20:21  fangxiaolog  阅读(93)  评论(0)    收藏  举报