python词云的绘制

词云的基本绘制

import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from PIL import Image
import numpy as np
#读取文本内容
def plot_wordcloud(file):
    
    with open(file,encoding='utf-8') as f:
        text=f.read()
    #加载中文词典
    # jieba.load_userdict("chinese_dict.txt")
    stopwords = set([
       "的", "了", "是", "我", "你", "他", "她", "它", "我们", "你们", "他们", "的", "在", "和", "与", "就", "不", "对", "也", "而", "及", "从"
    ])
    p=re.compile(r'[\s\W_]|[^\w\s]|[\u3000-\u303F]',re.S)
    text=p.sub('', text.strip())
    # 使用jieba分词
    words = jieba.cut(text)
    words=[i for i in words if i not in stopwords]
    #输入词云的形式为:一个以空格分隔开来的字符串
    text_cut = " ".join(words)
    print(text_cut)#词云输入的形式,是以空格隔开的一整个字符串
    #自定义词云形状,黑色显示文本,白色不显示。
    mask = np.array(Image.open("xing.jpg"))
    # 生成词云
    wordcloud = WordCloud(font_path="msyh.ttc",  # 字体路径,显示中文必备,Windows中文字体路径,macOS可以用'Arial Unicode MS'
                        width=800, 
                        height=400, 
                        background_color='white',
                        max_words=50,#显示最大的词数
                        contour_color='red',#蒙版轮廓线颜色
                        contour_width=3,#蒙版轮廓线宽度
                        mask=mask#蒙版,显示图形,是array类型
                        ).generate(text_cut)

    # 显示词云
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")  # 关闭坐标轴
    plt.show()

if __name__ == '__main__':
    plot_wordcloud(r'./text.txt')
posted @ 2025-05-27 17:00  CodeCraftsMan  阅读(34)  评论(0)    收藏  举报