B站手机缓存视频导出与整理

B站手机缓存视频存储目录:

内部存储设备/Android/data/tv.danmaku.bili/download/

扫描已导出的视频并将其重命名

import os
import json
import shutil
import subprocess
from pathlib import Path

def sanitize_filename(filename):
    """
    清理文件名中的非法字符
    
    Args:
        filename (str): 原始文件名
        
    Returns:
        str: 清理后的文件名
    """
    # 替换文件名中的非法字符
    illegal_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*']
    for char in illegal_chars:
        filename = filename.replace(char, '_')
    return filename

def find_entry_json_folders(directory):
    """
    递归查找包含entry.json文件的文件夹
    
    Args:
        directory (str): 要搜索的目录路径
        
    Returns:
        list: 包含entry.json文件的文件夹路径列表
    """
    entry_json_folders = []
    
    for root, dirs, files in os.walk(directory):
        if 'entry.json' in files:
            entry_json_folders.append(root)
            
    return entry_json_folders

def recursive_search(data, key):
    """
    在JSON结构中递归搜索指定的键
    
    Args:
        data: JSON数据结构(字典、列表或基本类型)
        key: 要搜索的键名
        
    Returns:
        找到的值,如果未找到则返回None
    """
    if isinstance(data, dict):
        # 如果当前数据是字典
        if key in data:
            return data[key]
        # 递归搜索字典中的每个值
        for value in data.values():
            result = recursive_search(value, key)
            if result is not None:
                return result
    elif isinstance(data, list):
        # 如果当前数据是列表,递归搜索每个元素
        for item in data:
            result = recursive_search(item, key)
            if result is not None:
                return result
    # 如果是基本类型或其他情况,直接返回None
    return None

def read_entry_json(entry_json_path):
    """
    读取entry.json文件并提取所需信息
    
    Args:
        entry_json_path (str): entry.json文件路径
        
    Returns:
        dict: 包含提取信息的字典
    """
    try:
        with open(entry_json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # 使用递归搜索提取所需字段
        title = recursive_search(data, 'title')
        cid = recursive_search(data, 'cid')
        page = recursive_search(data, 'page') or recursive_search(data, 'index')
        part = recursive_search(data, 'part') or recursive_search(data, 'index_title')
        
        # 获取avid或bvid或av_id
        xvid = (recursive_search(data, 'avid') or 
                recursive_search(data, 'bvid') or 
                recursive_search(data, 'av_id'))
        
        # 获取owner信息
        owner_id = recursive_search(data, 'owner_id') or recursive_search(data, 'mid')
        owner_name = recursive_search(data, 'owner_name') or recursive_search(data, 'name')
        
        entry_info = {}
        if title:
            entry_info['title'] = sanitize_filename(str(title))
        if xvid:
            entry_info['xvid'] = str(xvid)
        if part:
            entry_info['part'] = sanitize_filename(str(part))
        if cid:
            entry_info['cid'] = str(cid)
        if page:
            entry_info['page'] = str(page)
        if owner_id:
            entry_info['owner_id'] = str(owner_id)
        if owner_name:
            entry_info['owner_name'] = sanitize_filename(str(owner_name))
            
        return entry_info
    except Exception as e:
        raise Exception(f"读取entry.json失败 {entry_json_path}: {e}")

def generate_new_filename(entry_info, is_mp4=True):
    """
    根据entry.json信息生成新文件名
    
    Args:
        entry_info (dict): entry.json中提取的信息
        is_mp4 (bool): 是否为MP4文件
        
    Returns:
        str: 新文件名
    """
    # 构建基础文件名,只包含存在的字段
    name_parts = []
    
    # 按照指定顺序添加字段
    field_order = ['title', 'xvid', 'part', 'page', 'cid', 'owner_id', 'owner_name']
    for field in field_order:
        if field in entry_info:
            name_parts.append(entry_info[field])
    
    base_name = '_'.join(name_parts)
    
    if is_mp4:
        return f"{base_name}.mp4"
    else:
        return base_name  # 对于BLV文件,不添加扩展名

def find_media_files(folder):
    """
    在指定文件夹中查找BLV文件和M4S文件
    
    Args:
        folder (str): 要搜索的文件夹路径
        
    Returns:
        tuple: (blv_files列表, m4s_files字典{audio_path, video_path})
    """
    blv_files = []
    m4s_files = {'audio': None, 'video': None}
    
    for root, dirs, files in os.walk(folder):
        for file in files:
            file_path = os.path.join(root, file)
            
            # 跳过entry.json文件本身
            if file == 'entry.json':
                continue
                
            # 查找BLV文件
            if file.lower().endswith('.blv'):
                blv_files.append(file_path)
                
            # 查找M4S文件
            elif file == 'audio.m4s':
                m4s_files['audio'] = file_path
            elif file == 'video.m4s':
                m4s_files['video'] = file_path
    
    return blv_files, m4s_files

def merge_m4s_files(audio_path, video_path, output_path):
    """
    使用ffmpeg将audio.m4s和video.m4s合并为mp4文件
    
    Args:
        audio_path (str): 音频文件路径
        video_path (str): 视频文件路径
        output_path (str): 输出文件路径
    """
    try:
        # 使用ffmpeg合并音视频文件
        cmd = [
            'ffmpeg',
            '-i', video_path,    # 输入视频文件
            '-i', audio_path,    # 输入音频文件
            '-c', 'copy',        # 直接复制编解码,不重新编码
            '-y',                # 覆盖输出文件
            output_path
        ]
        
        # 执行ffmpeg命令
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        if result.returncode != 0:
            raise Exception(f"FFmpeg执行失败: {result.stderr}")
            
        return True
    except Exception as e:
        raise Exception(f"合并M4S文件失败: {e}")

def simulate_rename_operations(entry_json_folders, output_directory):
    """
    模拟重命名操作,生成文件名映射
    
    Args:
        entry_json_folders (list): 包含entry.json的文件夹列表
        output_directory (str): 输出目录路径
        
    Returns:
        dict: 操作信息映射
        
    Raises:
        Exception: 当出现文件名冲突时抛出异常
    """
    operations = {}
    
    for folder in entry_json_folders:
        # 读取entry.json
        entry_json_path = os.path.join(folder, 'entry.json')
        entry_info = read_entry_json(entry_json_path)
        
        # 查找媒体文件
        blv_files, m4s_files = find_media_files(folder)
        
        # 处理BLV文件
        for blv_file in blv_files:
            new_filename = generate_new_filename(entry_info, is_mp4=False) + f"_{os.path.basename(blv_file)}"
            new_filename = new_filename.replace('.blv', '.flv')  # 修改后缀名
            new_file_path = os.path.join(output_directory, new_filename)
            
            # 检查是否有冲突
            if new_filename in operations:
                raise Exception(f"文件名冲突: {new_filename}")
            
            operations[new_filename] = {
                'type': 'blv_rename',
                'source': blv_file,
                'target': new_file_path
            }
        
        # 处理M4S文件
        if m4s_files['audio'] and m4s_files['video']:
            new_filename = generate_new_filename(entry_info, is_mp4=True)
            new_file_path = os.path.join(output_directory, new_filename)
            
            # 检查是否有冲突
            if new_filename in operations:
                raise Exception(f"文件名冲突: {new_filename}")
            
            operations[new_filename] = {
                'type': 'm4s_merge',
                'audio': m4s_files['audio'],
                'video': m4s_files['video'],
                'target': new_file_path
            }
    
    return operations

def execute_operations(operations):
    """
    执行实际的操作(重命名BLV文件和合并M4S文件)
    
    Args:
        operations (dict): 操作信息映射
    """
    # 确保输出目录存在
    os.makedirs(output_directory, exist_ok=True)
    
    # 执行操作
    for new_filename, op_info in operations.items():
        try:
            if op_info['type'] == 'blv_rename':
                # 重命名BLV文件为FLV文件
                shutil.move(op_info['source'], op_info['target'])
                print(f"重命名成功: {op_info['source']} -> {op_info['target']}")
                
            elif op_info['type'] == 'm4s_merge':
                # 合并M4S文件为MP4文件
                merge_m4s_files(op_info['audio'], op_info['video'], op_info['target'])
                print(f"合并成功: {op_info['target']}")
                
        except Exception as e:
            print(f"操作失败 {new_filename}: {e}")

def process_bilibili_cache(input_directory, output_directory):
    """
    处理哔哩哔哩缓存文件夹
    
    Args:
        input_directory (str): 输入目录路径(包含缓存文件的根目录)
        output_directory (str): 输出目录路径
    """
    # 查找所有包含entry.json的文件夹
    entry_json_folders = find_entry_json_folders(input_directory)
    
    if not entry_json_folders:
        print("未找到任何包含entry.json的文件夹")
        return
    
    print(f"找到 {len(entry_json_folders)} 个包含entry.json的文件夹")
    
    try:
        # 模拟操作并生成映射
        operations = simulate_rename_operations(entry_json_folders, output_directory)
        
        print(f"模拟操作完成,共 {len(operations)} 个操作需要执行")
        print("操作列表:")
        for new_name, op_info in operations.items():
            if op_info['type'] == 'blv_rename':
                print(f"  重命名BLV: {new_name} <- {op_info['source']}")
            elif op_info['type'] == 'm4s_merge':
                print(f"  合并M4S: {new_name}")
        
        # 确认用户是否继续执行
        confirm = input("\n是否继续执行实际操作?(y/N): ")
        if confirm.lower() == 'y':
            # 执行实际操作
            execute_operations(operations)
            print("所有操作已完成")
        else:
            print("操作已取消")
            
    except Exception as e:
        print(f"操作失败: {e}")

# 使用示例
if __name__ == "__main__":
    # 指定输入和输出目录
    input_directory = "./"      # 修改为您的输入目录路径
    output_directory = "../renamed_videos2"     # 修改为您的输出目录路径
    
    process_bilibili_cache(input_directory, output_directory)

 

posted @ 2025-04-21 16:08  Isakovsky  阅读(114)  评论(0)    收藏  举报