Python学习笔记第26天

谏言:穷则独善其身,达则兼济天下

爬取B站弹幕数据进行词云显示

import requests
import re
import csv

headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
    }

# url='https://api.bilibili.com/x/v1/dm/list.so?oid=57236482'
# url='https://api.bilibili.com/x/v1/dm/list.so?oid=171484836'
url='https://api.bilibili.com/x/v1/dm/list.so?oid=186803402'
data=requests.get(url,headers=headers)
html_doc=data.content.decode('utf-8')
res=re.compile('<d.*?>(.*?)</d>')
bullet_screen=re.findall(res,html_doc)
# print(bullet_screen)
for i in bullet_screen:
    with open('B站1.csv','a',newline='',encoding='utf-8-sig') as f:
        writer=csv.writer(f)
        danmu_data=[]
        danmu_data.append(i)
        writer.writerow(danmu_data)


import jieba
import wordcloud
import imageio
f = open('B站1.csv',encoding='utf-8')
txt=f.read()
txt_list=jieba.lcut(txt)
string=" ".join(txt_list)

mk=imageio.imread(r'图片2.png')

# w=wordcloud.WordCloud(width=1000,height=700,background_color='white',font_path='msyh.ttc',mask=mk,scale=15,stopwords={' '},contour_width=5,contour_color='red')
w=wordcloud.WordCloud(width=1000,height=700,background_color='white',font_path='msyh.ttc',scale=15,stopwords={' '},mask=mk)

w.generate(string)
w.to_file('r5.png')

 

 

 

posted @ 2020-05-05 23:15  过气诗人  阅读(130)  评论(0)    收藏  举报