《聊斋志异》jieba分词

import jieba


def takeSecond(elem):
    return elem[1]


def main():
    path = "D:\聊斋\\1234.txt"
    file = open(path, "r", encoding="gbk")
    text = file.read()
    file.close()
    a=['，','。','：','“','”',' ','？','！','；','.']
 
    words = jieba.lcut(text)
    counts = {}
    for word in words:
        if word  not in a:
 
           counts[word] = counts.get(word, 0) + 1
 
    items = list(counts.items())
    items.sort(key=takeSecond, reverse=True)
 
    for i in range(40):
        item = items[i]
        keyWord = item[0]
        count = item[1]
        print("{0:<10}{1:>5}".format(keyWord, count))
 
 
main()

posted @ 2020-11-15 11:41 小伟往往阅读(81) 评论(0) 收藏举报

刷新页面返回顶部

小伟往往

《聊斋志异》jieba分词

公告