import oss2
import re
import logging
from datetime import datetime
import os
# 配置可修改的参数
cutoff_year = 2024 # 需要删除该年份之前的文件 文件名中包含日期的文件 2024-01-01
cutoff_month = 1 # 需要删除该月份之前的文件
batch_size = 1000 # 每批次删除的文件数量
def setup_logger():
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_filename = f"{log_dir}/oss_delete_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] [%(funcName)s] %(message)s",
handlers=[
logging.FileHandler(log_filename),
logging.StreamHandler()
]
)
return logging.getLogger()
# 配置OSS访问信息
access_key_id = ''
access_key_secret = ''
bucket_name = ''
# 外网
endpoint = '' # 例如:'http://oss-cn-beijing.aliyuncs.com'
# 内网
# endpoint = ''
auth = oss2.Auth(access_key_id, access_key_secret)
bucket = oss2.Bucket(auth, endpoint, bucket_name)
logger = setup_logger()
def delete_old_files(prefix='logs/'):
marker = ''
cutoff_date = datetime(cutoff_year, cutoff_month, 1) # 删除该日期之前的文件
pattern = re.compile(r'(?:\.log\.|log-)(\d{4}-\d{2}-\d{2})[_\.]\d+\.log\.gz$')
loop_count = 0
total_deleted = 0
while True:
loop_count += 1
keys_to_delete = []
try:
result = bucket.list_objects(prefix=prefix, marker=marker, max_keys=1000)
logger.info(f"成功获取 {len(result.object_list)} 个对象,当前 marker: {marker}")
except Exception as e:
logger.error(f"获取文件列表失败: {e}")
break
objects = result.object_list
if not objects:
logger.info("没有符合条件的文件可删除")
break
for obj in objects:
key = obj.key
match = pattern.search(key)
if not match:
logger.debug(f"文件未匹配: {key}")
continue
file_date_str = match.group(1)
try:
file_date = datetime.strptime(file_date_str, "%Y-%m-%d")
except ValueError:
logger.warning(f"无法解析日期: {file_date_str} (文件: {key})")
continue
if file_date < cutoff_date:
logger.info(f"标记删除文件: {key}, 日期: {file_date_str}")
keys_to_delete.append(key)
# **确保批量删除**
if len(keys_to_delete) >= batch_size:
total_deleted += batch_delete(keys_to_delete)
keys_to_delete = []
# **确保循环结束后删除剩余文件**
if keys_to_delete:
total_deleted += batch_delete(keys_to_delete)
marker = result.next_marker
if not result.is_truncated:
break
logger.info(f"总循环次数: {loop_count},删除的总文件数: {total_deleted}")
def batch_delete(keys):
"""执行批量删除,并返回删除的文件数"""
try:
bucket.batch_delete_objects(keys)
logger.info(f"成功删除 {len(keys)} 个文件: {', '.join(keys[:5])}... 等")
return len(keys)
except oss2.exceptions.OssError as e:
logger.error(f"删除失败: {e}, 失败的文件列表: {keys}")
return 0
if __name__ == "__main__":
logger.info(f"开始执行OSS文件清理任务,删除 {cutoff_year} 年 {cutoff_month} 月之前的文件")
try:
delete_old_files()
logger.info("所有符合条件的文件已处理完毕!")
except Exception as e:
logger.error(f"任务执行失败: {e}")