jieba词频分析

关键代码:

def jb(str1):
    words = jieba.lcut(str1)
    counts = {}
    for word in words:
        if len(word) == 1: # 排除单个字符的分词结果
            continue
        else:
            counts[word] = counts.get(word, 0) + 1 # 这里的0表示如果word这个键不在字典中,就添加这个键,并且默认为0,如果加上后面的1,就合理了
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True) # 以出现的次数为标准,从大到小
    for i in range(100):
        word, count = items[i]
        # print("{0:<10}{1:>5}".format(word, count))
        # print(word," ",count)
        # sql1="insert into all_num values ('"+word+"','"+str(count)+"')"
        # cursor.execute(sql1)
        # db.commit();
    cursor.close()

 

posted @ 2021-12-07 22:16  新古董  阅读(62)  评论(0)    收藏  举报