<<--B站..........欢迎来到DGX的博客..........GitHub-->>

我的B站

Python作业:jieba库

运用jieba库统计词频,并对词频进行排序

 1 import jieba
 2 txt = open("文章.txt","r",encoding='gbk',errors='replace').read()
 3 words  = jieba.lcut(txt)
 4 counts = {}
 5 for word in words:
 6     if len(word) == 1:
 7         continue
 8     else:
 9         counts[word] = counts.get(word,0) + 1
10         
11 items = list(counts.items())
12 items.sort(key=lambda x:x[1], reverse=True) 
13 for i in range(15):
14     word, count = items[i]
15     print ("{0:<10}{1:>5}".format(word, count))

词云

 1 from wordcloud import WordCloud
 2 import matplotlib.pyplot as plt
 3 import jieba
 4 def create_word_cloud(filename):
 5     text = open(file='文章.txt', encoding='utf-8').read()
 6     wordlist = jieba.cut(text, cut_all=True)
 7     wl = " ".join(wordlist)
 8     wc = WordCloud(
 9         background_color="black",
10         max_words=2000,
11         font_path='msyhl.ttf',
12         height=1200,
13         width=1600,
14         max_font_size=100,
15         random_state=100,
16         )
17     myword = wc.generate(wl)  
18     plt.imshow(myword)
19     plt.axis("off")
20     plt.show()
21     wc.to_file('img_book.png') 
22 if __name__ == '__main__':
23     create_word_cloud('mytext')

 

posted @ 2020-04-15 15:19  DG息  阅读(309)  评论(0)    收藏  举报