好不容易写完的词云

from PIL import Image
import wordcloud 
import numpy as np
import matplotlib.pyplot as plt
import jieba
  #文本处理
def f():
    with open('停用词.txt','r',encoding='utf-8') as fx:
        a,b=fx.readlines(),[]
        for i in a:
            i=i.strip('\n').strip(' ')#第二个 strip 用于把停用词后面的空格去掉
            b.append(i)
                    
        with open('余罪.txt','r',encoding='utf-8') as fo:
             c=[]
             for i in fo:
                 i=fo.readline()
                 fo1=i.strip('\n')
                 fo1=jieba.lcut(fo1)
                 for j in fo1:
                     if len(j) !=1:
                         if j not in b:
                             c.append(j)
    return c
 
def g(n):
     # n 为列表
     a,c,d={},[],[]
     for i in n:
         a[i]=a.get(i,0)+1
     b=list(a.items())
     b.sort(key=lambda x:x[1],reverse=True)
     for i in range(15):
         e,f=b[i]
         c.append(e)#向列表加入词
         d.append(f)#向列表加入词频
         print('{: <10}{:>10}'.format(e,f))
     return list([c,d])
 
 
def k(n):# n 为词语列表
     a=' '.join(n)
     mask=np.array(Image.open('图.jpg')) # 图片模板
     b= wordcloud.WordCloud(font_path ='SIMYOU.TTF' ,\
                            scale=20,\
                              max_words=6000,\
                              mask = mask,\
                              height= 800,\
                              width=800,\
                              background_color='white',\
                              repeat=False,\
                              mode='RGBA')#处理图片 
     b=b.generate(a)#填充词生成词云
     b.to_file('词云.png')#保存图片
a=f()#生成词语
b=g(a)#词频统计,返回两列表

k(a)

posted @ 2021-04-27 23:21  llal  阅读(79)  评论(0)    收藏  举报