一个完整的大作业
import re
import requests
from bs4 import BeautifulSoup
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator
news=""
html = requests.get('http://www.kejixun.com/news/index.html')
html.encoding = 'gb2312'
soup = BeautifulSoup(html.text,'html.parser')
for p in soup.find_all("figcaption",class_='title'):
news = news + p.get_text()
ls = []
words = jieba.lcut(news)
counts = {}
for word in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word,0)+1
ls.append(word)
items = list(counts.items())
items.sort(key = lambda x:x[1], reverse = True)
for i in range(10):
word , count = items[i]
print ("{:<10}{:>5}".format(word,count))
wz = open('ms.txt','w+')
wz.write(str(ls))
wz.close()
wz = open('ms.txt','r').read()
backgroud_Image = plt.imread('cloud.jpg')
wc = WordCloud( background_color = 'white',
mask = backgroud_Image,
max_words = 2000,
stopwords = STOPWORDS,
font_path = 'C:/Users/Windows/fonts/msyh.ttf',
max_font_size = 200,
random_state = 30,
)
wc.generate(wz)
image_colors = ImageColorGenerator(backgroud_Image)
wc.recolor(color_func = image_colors)
plt.imshow(wc)
plt.axis('off')
plt.show()

捕获关键词

生成词云

浙公网安备 33010602011771号