Python分析bilibili视频弹幕并获取发表最多的前十条弹幕

代码如下

import requests
import re
# from bs4 import BeautifulSoup
import pandas as pd
import easygui
def __getXMlUrl(reponse_text):

    match_rule = r'cid=(.*?)&aid'
    oid = re.search(match_rule, reponse_text).group().replace('cid=', '').replace('&aid', '')

    # 通过该 `oid` 参数获得xml的链接
    xml_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + oid

    return xml_url

def get_danmu(bv_url):
    # 获取弹幕数据
    headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"}
    resp = requests.get(bv_url, headers=headers)
    getXMlUrlFun = __getXMlUrl
    danmu_url = getXMlUrlFun(resp.text)

    danmu = requests.get(url=danmu_url,headers=headers).content.decode()
    # print(danmu)
    danmu_list = re.findall('<d p=".*?">(.*?)</d>', danmu)
    # print(danmu_list)
    return danmu_list
def compute_danmu():
    #处理弹幕数据 抓出前十
    danmu_var = pd.Series(danmu_list)
    # print(danmu_var)
    value = danmu_var.value_counts()
    # print(value)
    # 获取排名前十的弹幕值
    danmu_dict = {}
    for i in range(10):
        danmu_data = value.index[i]
        # print(danmu_data)
        danmu_num = value[danmu_data]
        # print(danmu_num)
        danmu_dict_ = {danmu_data:danmu_num}
        danmu_dict = {**danmu_dict,**danmu_dict_}
    print(danmu_dict)
    return danmu_dict
while True:
    #可视化获取弹幕数据
    bv_url = easygui.enterbox(msg="请输入b站视频链接",title="b站弹幕分析工具")
    danmu_list = get_danmu(bv_url=bv_url)
    danmu_dict = compute_danmu()
    danmu_text=""
    for nu in range(10):
        danmu_ = "弹幕排名第"+ str(nu+1)+": "+'"'+str(list(danmu_dict)[nu]) +'"'+" "*40+ "数量为" + str(list(danmu_dict.values())[nu])+"\n"
        danmu_text = danmu_ + danmu_text
    easygui.textbox(msg="出现次数最多的十条弹幕以及次数",title="b站弹幕分析结果",text=str(danmu_dict)+'\n'+danmu_text)

 

pandas常用方法 统计key出现的次数

使用value_counts()方法

例:
value = danmu_var.value_counts()
print(value)

输出值如下:

 

取第一个index索引值

value.index[0]
例:
danmu_data = value.index[0]
print(danmu_data)

输出值如下

 

 

输出第一个索引对应的value值

danmu_num = value[danmu_data]
print(danmu_num)

 输出如下

 

 

 

将多个数据存入到字典中

danmu_dict = {**danmu_dict,**danmu_dict_}
danmu_dict_ 为不断增加的新数据,累加到后面

******************************************
******************************************
******************************************



最终运行效果

 

 

 

 



 

posted @ 2021-12-18 04:59  Taylor十万伏特  阅读(237)  评论(0)    收藏  举报