Python分析bilibili视频弹幕并获取发表最多的前十条弹幕
代码如下
import requests import re # from bs4 import BeautifulSoup import pandas as pd import easygui def __getXMlUrl(reponse_text): match_rule = r'cid=(.*?)&aid' oid = re.search(match_rule, reponse_text).group().replace('cid=', '').replace('&aid', '') # 通过该 `oid` 参数获得xml的链接 xml_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + oid return xml_url def get_danmu(bv_url): # 获取弹幕数据 headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"} resp = requests.get(bv_url, headers=headers) getXMlUrlFun = __getXMlUrl danmu_url = getXMlUrlFun(resp.text) danmu = requests.get(url=danmu_url,headers=headers).content.decode() # print(danmu) danmu_list = re.findall('<d p=".*?">(.*?)</d>', danmu) # print(danmu_list) return danmu_list def compute_danmu(): #处理弹幕数据 抓出前十 danmu_var = pd.Series(danmu_list) # print(danmu_var) value = danmu_var.value_counts() # print(value) # 获取排名前十的弹幕值 danmu_dict = {} for i in range(10): danmu_data = value.index[i] # print(danmu_data) danmu_num = value[danmu_data] # print(danmu_num) danmu_dict_ = {danmu_data:danmu_num} danmu_dict = {**danmu_dict,**danmu_dict_} print(danmu_dict) return danmu_dict while True: #可视化获取弹幕数据 bv_url = easygui.enterbox(msg="请输入b站视频链接",title="b站弹幕分析工具") danmu_list = get_danmu(bv_url=bv_url) danmu_dict = compute_danmu() danmu_text="" for nu in range(10): danmu_ = "弹幕排名第"+ str(nu+1)+": "+'"'+str(list(danmu_dict)[nu]) +'"'+" "*40+ "数量为" + str(list(danmu_dict.values())[nu])+"\n" danmu_text = danmu_ + danmu_text easygui.textbox(msg="出现次数最多的十条弹幕以及次数",title="b站弹幕分析结果",text=str(danmu_dict)+'\n'+danmu_text)
pandas常用方法 统计key出现的次数
使用value_counts()方法
例:
value = danmu_var.value_counts()
print(value)
输出值如下:

取第一个index索引值
value.index[0]
例:
danmu_data = value.index[0]
print(danmu_data)
输出值如下

输出第一个索引对应的value值
danmu_num = value[danmu_data]
print(danmu_num)
输出如下

将多个数据存入到字典中
danmu_dict = {**danmu_dict,**danmu_dict_}
danmu_dict_ 为不断增加的新数据,累加到后面
******************************************
******************************************
******************************************
最终运行效果



浙公网安备 33010602011771号