import os
import oss2
import json
import logging
import threading
import traceback
from concurrent.futures import ThreadPoolExecutor
# ====================== 可配置参数 ======================
THREAD_COUNT = 10 # 线程池大小,可调整
# 阿里云 OSS 访问信息
access_key_id = ''
access_key_secret = ''
endpoint = '' # 内网
bucket_name = ''
# 本地目录路径
local_directory = '/path/logsfile/logs'
# 记录上传状态的文件
state_file = 'upload_state.txt'
temp_state_file = 'upload_state.tmp'
# 日志文件
log_file = 'upload.log'
error_log_file = 'error.log'
# ====================== 日志配置(增加线程 ID) ======================
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] [%(threadName)s | Thread-%(thread)d] - %(message)s',
handlers=[
logging.FileHandler(log_file, encoding='utf-8'),
logging.StreamHandler()
]
)
error_logger = logging.getLogger("error")
error_handler = logging.FileHandler(error_log_file, encoding='utf-8')
error_handler.setLevel(logging.ERROR)
error_logger.addHandler(error_handler)
# ====================== OSS 连接 ======================
auth = oss2.Auth(access_key_id, access_key_secret)
bucket = oss2.Bucket(auth, endpoint, bucket_name)
# ====================== 读取上次上传状态 ======================
upload_state = {}
if os.path.exists(state_file) and os.path.getsize(state_file):
try:
with open(state_file, 'r') as f:
upload_state = json.load(f)
except (json.JSONDecodeError, IOError) as e:
error_logger.error(f"Error reading state file: {str(e)}")
upload_state = {}
# 预加载已上传文件,提高查询效率
uploaded_files = set(upload_state.keys())
# 线程锁(确保多线程安全更新状态文件)
lock = threading.Lock()
# ====================== 上传文件函数 ======================
def upload_file(local_file_path, oss_object_key):
thread_id = threading.get_ident() # 获取线程编号
try:
bucket.put_object_from_file(oss_object_key, local_file_path)
logging.info(f'[Thread-{thread_id}] Uploaded {local_file_path} -> {oss_object_key}')
# 记录上传状态
with lock:
upload_state[oss_object_key] = os.path.getmtime(local_file_path)
uploaded_files.add(oss_object_key)
# 上传成功后删除文件
try:
os.remove(local_file_path)
logging.info(f'[Thread-{thread_id}] Deleted {local_file_path}')
except OSError as e:
error_logger.error(f"[Thread-{thread_id}] Failed to delete {local_file_path}: {str(e)}")
except oss2.exceptions.OssError as e:
error_logger.error(f"[Thread-{thread_id}] OSS upload failed for {local_file_path}: {str(e)}")
except Exception as e:
error_logger.error(f"[Thread-{thread_id}] Unexpected error while uploading {local_file_path}: {traceback.format_exc()}")
# ====================== 递归上传目录(多线程) ======================
def upload_directory_to_oss(local_directory, bucket, oss_directory='logs'):
with ThreadPoolExecutor(max_workers=THREAD_COUNT) as executor:
futures = []
for root, _, files in os.walk(local_directory):
for file in files:
local_file_path = os.path.join(root, file)
oss_object_key = os.path.join(oss_directory, os.path.relpath(local_file_path, local_directory))
# 判断是否需要上传
if oss_object_key not in uploaded_files or os.path.getmtime(local_file_path) > upload_state.get(oss_object_key, 0):
futures.append(executor.submit(upload_file, local_file_path, oss_object_key))
# 等待所有任务完成
for future in futures:
future.result()
# ====================== 开始执行上传 ======================
upload_directory_to_oss(local_directory, bucket)
# ====================== 保存上传状态(保证完整性) ======================
try:
with open(temp_state_file, 'w') as f:
json.dump(upload_state, f)
# 原子替换,防止状态文件损坏
os.replace(temp_state_file, state_file)
logging.info("Upload state file updated successfully.")
except IOError as e:
error_logger.error(f"Failed to write state file: {str(e)}")