爬取B站视频

极简方法:
下载you-get库,知道视频地址就ok,要是安装并配置ffmpeg会直接将下载好的音频和视频合并,方法如下:
`#在cmd中输入
you-get -o 保存地址 url地址

`

完整代码 --2025.8.12,其中用到了moviepy(ffmpeg)视频合并功能,因为B站视频的音频和视频是分开的

点击查看代码
from moviepy.editor import AudioFileClip, VideoFileClip, CompositeVideoClip
import tempfile
import os
import requests
import re
import json
# 配置区
url = 'https://www.bilibili.com/video/BV18gGezoELy?t=2.5'
cookie = "你自己的cookie"
headers = {
    "Referer": url,
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36...",
    "Cookie": cookie
}
try:
    # 请求与解析
    response = requests.get(url=url, headers=headers, timeout=10)
    response.raise_for_status()
    html = response.text

    title = re.findall('title="(.*?)"', html)[0]
    print(f"视频标题: {title}")

    info = re.findall('window.__playinfo__=(.*?)</script>', html)[0]
    json_data = json.loads(info)
    video_url = json_data['data']['dash']['video'][0]['baseUrl']
    audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
    print(f"视频流: {video_url[:50]}...")
    print(f"音频流: {audio_url[:50]}...")

    # 下载媒体
    video_content = requests.get(video_url, headers=headers).content
    audio_content = requests.get(audio_url, headers=headers).content

    # 临时文件处理
    with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".mp4") as video_temp:
        video_temp.write(video_content)
        video_temp_path = video_temp.name

    with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".mp3") as audio_temp:
        audio_temp.write(audio_content)
        audio_temp_path = audio_temp.name  # 修正路径
    # 音视频合成
    video = VideoFileClip(video_temp_path)
    audio = AudioFileClip(audio_temp_path)
    final_video = video.set_audio(audio)  # 更简洁的合成方式


    # 最终视频输出部分(修复后)
    output_dir = "../shipin"
    os.makedirs(output_dir, exist_ok=True)

    final_video.write_videofile(
        f"{output_dir}/{title}.mp4",
        codec="libx264",
        audio_codec="aac",
        logger='bar'  # 或完全移除 progress_bar 参数
    )
finally:
    # 资源清理
    if 'video' in locals(): video.close()
    if 'audio' in locals(): audio.close()
    if os.path.exists(video_temp_path): os.remove(video_temp_path)
    if os.path.exists(audio_temp_path): os.remove(audio_temp_path)
    print("临时文件已清理")
运行结果如下: ![屏幕截图 2025-08-12 173524](https://img2024.cnblogs.com/blog/3622594/202508/3622594-20250812173544720-659196737.png) 我个人认为能用正则正确匹配其他的应该问题不大
posted @ 2025-08-12 17:41  灵汉  阅读(44)  评论(0)    收藏  举报