Python 作业( 运用Jieba库分词以及运用wordcloud库做词云图 )

Jieba库实例

(1)、运用Jieba库分析三国演义, 得到词频统计, 并对词频进行排序。

 

(2)、 根据得到的关键词, 做一个词云图

import jieba
import wordcloud as wc
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

txt = open("三国演义.txt", "r", encoding = "utf-8").read()
excludes = {"将军","却说","荆州","二人","不可","不能","如此","主公","商议","如何","军士","左右","军马"\
    ,"引兵","次日","大喜","天下","东吴","于是","今日","不敢","魏兵","陛下","一人","都督","人马","不知"\
    ,"汉中","只见","众将","后主","蜀兵","上马","大叫","太守","此人","夫人","先主","后人","背后","城中"}
counts = {}
words = jieba.lcut(txt)
for word in words:
    if len(word) == 1:
        continue
    elif word == "诸葛亮" or word == "孔明曰":
        rword = "孔明"
    elif word == "关公" or word == "云长":
        rword = "关羽"
    elif word == "玄德" or word == "玄德曰":
        rword = "刘备"
    elif word == "孟德" or word == "丞相":
        rword = "曹操"
    else:
        rword = word
    counts[rword] = counts.get(rword, 0) + 1
for word in excludes:
    del counts[word]
items = list(counts.items())
items.sort(key = lambda x:x[1], reverse = True)
text = ''
for w in range(200):
    text += items[w][0] + ' '

for i in range(20):
    word, count = items[i]
    print('{:<10}{:>5}'.format(word, count))

font = "C:\\WINDOWS\\FONTS\\MSYHL.TTC"  #该处应写所需字体的路径
bg_pic = np.array(Image.open('C:\\Users\\Administrator\\Desktop\\tree.jpg'))
#同样这里写的是背景图片的路径

cloud = wc.WordCloud(font_path=font,#设置字体
           background_color="white", #背景颜色
           max_words=2000,# 词云显示的最大词数
           mask=bg_pic,#设置背景图片
           max_font_size=100, #字体最大值
           random_state=42)
mywc = cloud.generate(text)
plt.imshow(mywc)
plt.axis('off')
plt.show()
mywc.to_file('mywc.png')

 

posted @ 2020-04-07 15:03  L_Hjgg  阅读(524)  评论(0编辑  收藏  举报