李尔王皮卡丘

from PIL import Image
import wordcloud 
import numpy as np
import matplotlib.pyplot as plt
import jieba
#文本处理
def f():
    with open('停用词.txt','r',encoding='utf-8') as fx:
        a,b=fx.readlines(),[]
        for i in a:
            i=i.strip('\n').strip(' ')#第二个 strip 用于把停用词后面的空格去掉
            b.append(i)
                
    with open('李尔王.txt','r',encoding='utf-8') as fo:
        c=[]
        for i in fo:
            i=fo.readline()
            fo1=i.strip('\n')
            fo1=jieba.lcut(fo1)
            for j in fo1:
                if len(j) !=1:
                    if j not in b:
                        c.append(j)
    return c

def g(n):
    # n 为列表
    a,c,d={},[],[]
    for i in n:
        a[i]=a.get(i,0)+1
    b=list(a.items())
    b.sort(key=lambda x:x[1],reverse=True)
    for i in range(15):
        e,f=b[i]
        c.append(e)#向列表加入词
        d.append(f)#向列表加入词频
        print('{: <10}{:>10}'.format(e,f))
    return list([c,d])

def h(n):# n 为词频列表
    a=n
    plt.rcParams['font.sans-serif']=['simHei']
    plt.rcParams['axes.unicode_minus']=False
    name_list =a[0]#
    num_list =a[1]#词频
    plt.bar(range(len(num_list)), num_list,tick_label=name_list,fc='r')
    plt.show()

def k(n):# n 为词语列表
    a=' '.join(n)
    mask=np.array(Image.open('皮卡丘.jpg')) # 图片模板
    b= wordcloud.WordCloud(font_path ='SIMYOU.TTF' ,\
                           scale=13,\
                             max_words=2500,\
                             mask = mask,\
                             height= 800,\
                             width=800,\
                             background_color='white',\
                             repeat=False,\
                             mode='RGBA')#处理图片 
    b=b.generate(a)#填充词生成词云
    b.to_file('词云2.png')
a=f()#生成词语
b=g(a)#词频统计,返回两列表
#h(b)#词频可视化
k(a)

posted @ 2021-04-27 23:22  池云  阅读(54)  评论(0)    收藏  举报