《聊斋志异》jieba分词

import jieba


def takeSecond(elem):
    return elem[1]


def main():
    path = "D:\聊斋\\1234.txt"
    file = open(path, "r", encoding="gbk")
    text = file.read()
    file.close()
    a=['','','','','',' ','','','','.']
 
    words = jieba.lcut(text)
    counts = {}
    for word in words:
        if word  not in a:
 
           counts[word] = counts.get(word, 0) + 1
 
    items = list(counts.items())
    items.sort(key=takeSecond, reverse=True)
 
    for i in range(40):
        item = items[i]
        keyWord = item[0]
        count = item[1]
        print("{0:<10}{1:>5}".format(keyWord, count))
 
 
main()

posted @ 2020-11-15 11:41  小伟往往  阅读(81)  评论(0)    收藏  举报