词云的基本绘制
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from PIL import Image
import numpy as np
#读取文本内容
def plot_wordcloud(file):
with open(file,encoding='utf-8') as f:
text=f.read()
#加载中文词典
# jieba.load_userdict("chinese_dict.txt")
stopwords = set([
"的", "了", "是", "我", "你", "他", "她", "它", "我们", "你们", "他们", "的", "在", "和", "与", "就", "不", "对", "也", "而", "及", "从"
])
p=re.compile(r'[\s\W_]|[^\w\s]|[\u3000-\u303F]',re.S)
text=p.sub('', text.strip())
# 使用jieba分词
words = jieba.cut(text)
words=[i for i in words if i not in stopwords]
#输入词云的形式为:一个以空格分隔开来的字符串
text_cut = " ".join(words)
print(text_cut)#词云输入的形式,是以空格隔开的一整个字符串
#自定义词云形状,黑色显示文本,白色不显示。
mask = np.array(Image.open("xing.jpg"))
# 生成词云
wordcloud = WordCloud(font_path="msyh.ttc", # 字体路径,显示中文必备,Windows中文字体路径,macOS可以用'Arial Unicode MS'
width=800,
height=400,
background_color='white',
max_words=50,#显示最大的词数
contour_color='red',#蒙版轮廓线颜色
contour_width=3,#蒙版轮廓线宽度
mask=mask#蒙版,显示图形,是array类型
).generate(text_cut)
# 显示词云
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off") # 关闭坐标轴
plt.show()
if __name__ == '__main__':
plot_wordcloud(r'./text.txt')