Python作业:jieba库
运用jieba库统计词频,并对词频进行排序
1 import jieba 2 txt = open("文章.txt","r",encoding='gbk',errors='replace').read() 3 words = jieba.lcut(txt) 4 counts = {} 5 for word in words: 6 if len(word) == 1: 7 continue 8 else: 9 counts[word] = counts.get(word,0) + 1 10 11 items = list(counts.items()) 12 items.sort(key=lambda x:x[1], reverse=True) 13 for i in range(15): 14 word, count = items[i] 15 print ("{0:<10}{1:>5}".format(word, count))
词云
1 from wordcloud import WordCloud 2 import matplotlib.pyplot as plt 3 import jieba 4 def create_word_cloud(filename): 5 text = open(file='文章.txt', encoding='utf-8').read() 6 wordlist = jieba.cut(text, cut_all=True) 7 wl = " ".join(wordlist) 8 wc = WordCloud( 9 background_color="black", 10 max_words=2000, 11 font_path='msyhl.ttf', 12 height=1200, 13 width=1600, 14 max_font_size=100, 15 random_state=100, 16 ) 17 myword = wc.generate(wl) 18 plt.imshow(myword) 19 plt.axis("off") 20 plt.show() 21 wc.to_file('img_book.png') 22 if __name__ == '__main__': 23 create_word_cloud('mytext')


浙公网安备 33010602011771号