jieba词频分析
关键代码:
def jb(str1):
words = jieba.lcut(str1)
counts = {}
for word in words:
if len(word) == 1: # 排除单个字符的分词结果
continue
else:
counts[word] = counts.get(word, 0) + 1 # 这里的0表示如果word这个键不在字典中,就添加这个键,并且默认为0,如果加上后面的1,就合理了
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True) # 以出现的次数为标准,从大到小
for i in range(100):
word, count = items[i]
# print("{0:<10}{1:>5}".format(word, count))
# print(word," ",count)
# sql1="insert into all_num values ('"+word+"','"+str(count)+"')"
# cursor.execute(sql1)
# db.commit();
cursor.close()

浙公网安备 33010602011771号