import requests
from bs4 import BeautifulSoup
import json
import jieba.analyse
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
url = "https://item.btime.com/36i90hfhkt3838be1gof3cla1ka?from=haozcxw"
res = requests.get(url)
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text,'html.parser')
title = soup.select('.title')[0].text
content = soup.select('.content-text')[0].text
info = soup.select('.edit-info')[0].text
au=info[info.find('责任编辑:'):].split()[0].lstrip('责任编辑:')
print(title,content,au)
f = open('content.txt', 'a', encoding='utf-8')
f.write(content)
f.close()
strl = ''',。、‘’ '''
for i in strl:
ls = content.replace(i," ")
print(ls)
lyric= ''
f=open('content.txt','r', encoding='utf-8')
for i in f:
lyric+=f.read()
result=jieba.analyse.textrank(lyric,topK=50,withWeight=True)
keywords = dict()
for i in result:
keywords[i[0]]=i[1]
print(keywords)
image= Image.open('t01c9f26bac34842d0d.jpg')
graph = np.array(image)
wc = WordCloud(font_path='./fonts/simhei.ttf',background_color='White',max_words=50,mask=graph)
wc.generate_from_frequencies(keywords)
image_color = ImageColorGenerator(graph)
plt.imshow(wc)
plt.imshow(wc.recolor(color_func=image_color))
plt.axis("off")
plt.show()
wc.to_file('d.jpg')