from PIL import Image
import wordcloud
import numpy as np
import matplotlib.pyplot as plt
import jieba
#文本处理
def f():
with open('停用词.txt','r',encoding='utf-8') as fx:
a,b=fx.readlines(),[]
for i in a:
i=i.strip('\n').strip(' ')#第二个 strip 用于把停用词后面的空格去掉
b.append(i)
with open('李尔王.txt','r',encoding='utf-8') as fo:
c=[]
for i in fo:
i=fo.readline()
fo1=i.strip('\n')
fo1=jieba.lcut(fo1)
for j in fo1:
if len(j) !=1:
if j not in b:
c.append(j)
return c
def g(n):
# n 为列表
a,c,d={},[],[]
for i in n:
a[i]=a.get(i,0)+1
b=list(a.items())
b.sort(key=lambda x:x[1],reverse=True)
for i in range(15):
e,f=b[i]
c.append(e)#向列表加入词
d.append(f)#向列表加入词频
print('{: <10}{:>10}'.format(e,f))
return list([c,d])
def h(n):# n 为词频列表
a=n
plt.rcParams['font.sans-serif']=['simHei']
plt.rcParams['axes.unicode_minus']=False
name_list =a[0]#词
num_list =a[1]#词频
plt.bar(range(len(num_list)), num_list,tick_label=name_list,fc='r')
plt.show()
def k(n):# n 为词语列表
a=' '.join(n)
mask=np.array(Image.open('皮卡丘.jpg')) # 图片模板
b= wordcloud.WordCloud(font_path ='SIMYOU.TTF' ,\
scale=13,\
max_words=2500,\
mask = mask,\
height= 800,\
width=800,\
background_color='white',\
repeat=False,\
mode='RGBA')#处理图片
b=b.generate(a)#填充词生成词云
b.to_file('词云2.png')
a=f()#生成词语
b=g(a)#词频统计,返回两列表
#h(b)#词频可视化
k(a)
![]()