中文和英文的词云生成

1、中文分词

库的调用

from wordcloud import WordCloud
import jieba
import numpy as np
from PIL import Image
from matplotlib import colors

需要生成词库的文本打开

with open(r'ciyun2.txt','r',encoding='utf-8') as f:
data=f.read()
wordList_jieba=jieba.lcut(data) #中文分词
data=','.join(wordList_jieba)
font=r'C:\windows\Fonts\SIMLI.ttf' #词云字体选择
all_words=[]
with open(r'chinesestopwords.txt','r',encoding='utf-8') as f1: #打开停止词文本,删除生成词云的文本中的不需要的词
all_words=f1.readlines()
stopwords=[word.strip() for word in all_words]
stopwords.extend(['一点','一种','包括','第二个','讲','特别','说']) #额外添加一些停止词
color_list=['#FF0000','#a41a1a']#建立颜色数组
colormap=colors.ListedColormap(color_list)#调用
py_mask=np.array(Image.open('picture.png')) #词云形状图片
wc=WordCloud(scale=4,mask=py_mask,font_path=font,stopwords=stopwords,background_color='white',colormap=colormap) #其中scale越大,词云的清晰度越高
wc.generate(data)
wc.to_file('ciyunone2(修改).jpg')

2、英文分词(程序参数不再介绍,根据中文分词可以理解)
import cv2
from wordcloud import WordCloud
img_mask=cv2.imread('hamlet.jpg')
f=open('hamlet.txt','r',encoding='utf-8')
txt=f.read()
f.close()
wc=WordCloud(width=800,
height=600,
max_words=150,
max_font_size=80,
mask=img_mask,
background_color=None).generate(txt)
wc.to_file('hamlet.jpg')

posted @ 2021-02-28 11:41  首心  阅读(195)  评论(0编辑  收藏  举报