使用python3 批量删除oss的文件

import oss2
import re
import logging
from datetime import datetime
import os

# 配置可修改的参数
cutoff_year = 2024  # 需要删除该年份之前的文件 文件名中包含日期的文件 2024-01-01
cutoff_month = 1  # 需要删除该月份之前的文件
batch_size = 1000  # 每批次删除的文件数量

def setup_logger():
    log_dir = "logs"
    os.makedirs(log_dir, exist_ok=True)
    log_filename = f"{log_dir}/oss_delete_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
    
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] [%(funcName)s] %(message)s",
        handlers=[
            logging.FileHandler(log_filename),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger()

# 配置OSS访问信息
access_key_id = ''
access_key_secret = ''
bucket_name = ''
# 外网
endpoint = ''  # 例如:'http://oss-cn-beijing.aliyuncs.com'
# 内网
# endpoint = '' 

auth = oss2.Auth(access_key_id, access_key_secret)
bucket = oss2.Bucket(auth, endpoint, bucket_name)
logger = setup_logger()

def delete_old_files(prefix='logs/'):
    marker = ''
    cutoff_date = datetime(cutoff_year, cutoff_month, 1)  # 删除该日期之前的文件
    
    pattern = re.compile(r'(?:\.log\.|log-)(\d{4}-\d{2}-\d{2})[_\.]\d+\.log\.gz$')
    loop_count = 0  
    total_deleted = 0  

    while True:
        loop_count += 1
        keys_to_delete = []

        try:
            result = bucket.list_objects(prefix=prefix, marker=marker, max_keys=1000)
            logger.info(f"成功获取 {len(result.object_list)} 个对象,当前 marker: {marker}")
        except Exception as e:
            logger.error(f"获取文件列表失败: {e}")
            break
        
        objects = result.object_list
        if not objects:
            logger.info("没有符合条件的文件可删除")
            break

        for obj in objects:
            key = obj.key
            match = pattern.search(key)
            if not match:
                logger.debug(f"文件未匹配: {key}")
                continue  
            
            file_date_str = match.group(1)  
            try:
                file_date = datetime.strptime(file_date_str, "%Y-%m-%d")
            except ValueError:
                logger.warning(f"无法解析日期: {file_date_str} (文件: {key})")
                continue  
            
            if file_date < cutoff_date:
                logger.info(f"标记删除文件: {key}, 日期: {file_date_str}")
                keys_to_delete.append(key)

            # **确保批量删除**
            if len(keys_to_delete) >= batch_size:
                total_deleted += batch_delete(keys_to_delete)
                keys_to_delete = []

        # **确保循环结束后删除剩余文件**
        if keys_to_delete:
            total_deleted += batch_delete(keys_to_delete)

        marker = result.next_marker
        if not result.is_truncated:
            break  

    logger.info(f"总循环次数: {loop_count},删除的总文件数: {total_deleted}")

def batch_delete(keys):
    """执行批量删除,并返回删除的文件数"""
    try:
        bucket.batch_delete_objects(keys)
        logger.info(f"成功删除 {len(keys)} 个文件: {', '.join(keys[:5])}... 等")
        return len(keys)
    except oss2.exceptions.OssError as e:
        logger.error(f"删除失败: {e}, 失败的文件列表: {keys}")
        return 0

if __name__ == "__main__":
    logger.info(f"开始执行OSS文件清理任务,删除 {cutoff_year} 年 {cutoff_month} 月之前的文件")
    try:
        delete_old_files()
        logger.info("所有符合条件的文件已处理完毕!")
    except Exception as e:
        logger.error(f"任务执行失败: {e}")

posted @ 2025-03-26 09:54  蒲公英PGY  阅读(48)  评论(0)    收藏  举报