python生成词云图

代码示例:


#!/usr/bin/python
#  coding: UTF-8
# coding:utf8
# wordcloud库默认为英文文本设置,默认字体也是英文
# jieba支持中文

import sys
from os import path
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot
import jieba
import jieba.analyse
from wordcloud import WordCloud

matplotlib.use('TkAgg')
d=path.dirname(__file__)

# stopwords_path = 'stopwords\stopwords1893.txt' # 停用词词表

# 添加的自定义中文语句的代码在这里
jieba.add_word('路明非')

# 读取整个文本-要分析的文本
text=open(path.join(d,'doubancomment.csv')).read()

def jiebaclearText(text):
    mywordlist = []
    seg_list = jieba.cut(text, cut_all=False)  #精确模式
    liststr="/ ".join(seg_list)
    f_stop = open(path.join(d,'stopwords1893.txt'))
    try:
        f_stop_text = f_stop.read( )
        f_stop_text= f_stop_text.encode("utf-8")
    finally:
        f_stop.close( )
    f_stop_seg_list=str(f_stop_text,encoding="utf-8").split('\n')
    for myword in liststr.split('/'):
        if not(myword.strip() in f_stop_seg_list) and len(myword.strip())>1:
            mywordlist.append(myword)
    return ''.join(mywordlist)

text1 = jiebaclearText(text)
print(text1)
file_path = os.path.abspath('.')
alice_mask=np.array(Image.open(path.join(d,"pyworld.png")))
font =file_path+ '/huawen.ttf'   #一定要设置中文字体否则显示乱码 先下载
print(file_path)
wc=WordCloud(background_color="black",font_path=font,max_words=2000,mask=alice_mask,stopwords=open(path.join(d,file_path + "/stopwords/stopwords1893.txt")).read())
# 生成一个词云图像
wordcloud=wc.generate(text1)
image=wordcloud.to_image()
# 展示生成的词云图像
image.show()
posted @ 2019-08-08 14:42  fengyanhuaivy  阅读(531)  评论(0编辑  收藏  举报