线程池ThreadPoolExecutor
常见的情形:
-
优雅退出示例
import json import time from loguru import logger from pathlib import Path from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED import threading from utils.retry import retry_on_failure local_storage = threading.local() class StopEventException(Exception): """自定义异常:用于 stop_event 触发,不重试、不日志""" pass class TestThreadPoolExecutor: '''线程池测试''' def __init__(self): # 1、源目录:用于存储txt文件 self.source_files_dir = Path(__file__).resolve().parent / 'source_files' self.source_files_dir.mkdir(parents=True, exist_ok=True) # 2、输出目录:用于存放生成的json文件 self.output_files_dir = Path(__file__).resolve().parent / 'output_files' self.output_files_dir.mkdir(parents=True, exist_ok=True) # 3、默认线程数 self.executor = ThreadPoolExecutor(max_workers=4) # 4、全局停止事件(每个批次重置) self.stop_event = threading.Event() def run(self): try: while True: # 使用 glob 动态获取所有 .txt 文件,限制仅处理 .txt 结尾的文件 txt_paths = list(self.source_files_dir.glob('*.txt')) if not txt_paths: break # 如果没有文件,则退出循环(原版 while os.listdir 类似逻辑,但仅检查一次) # 处理当前批次的 .txt 文件(每个批次同步处理,上一批次完才下一批) for txt_path in txt_paths: # 每个批次开始前,重置停止事件(避免上一批次 set 状态影响) self.stop_event.clear() start_time = time.time() logger.info(f'开始处理文件:{txt_path.name}') # 读取 txt 文件内容 with open(txt_path, 'r', encoding='utf-8') as f: tasks = [line.strip() for line in f.read().splitlines() if line.strip()] futures = [] for task in tasks: future = self.executor.submit(self.find, task) futures.append(future) # 使用 as_completed 检查完成的任务(按完成顺序,非阻塞) results = [] # 始终返回 list: [success_acc] 或 [] for future in as_completed(futures): try: acc_info = future.result() # 无超时,阻塞等待 if acc_info and acc_info.get('success'): results = [acc_info] # 找到第一个成功,停止进一步检查 self.stop_event.set() # 通知其他线程立即退出 # 手动取消剩余未完成任务(pending 有效,已运行靠 Event 自停) for f in futures: if not f.done(): f.cancel() logger.success( f'文件 {txt_path.name}:找到{acc_info},耗时:{time.time() - start_time:.2f}秒') break except Exception as e: logger.error(f'任务异常:{e},取消剩余任务并抛出') self.stop_event.set() # 通知其他线程立即退出 # 手动取消剩余未完成任务(pending 有效,已运行靠 Event 自停) for f in futures: if not f.done(): f.cancel() # 立即抛出异常,中断当前批次处理(可传播到外层 try 或崩溃程序) raise # 重新抛出 e,停止循环 # 等待本批次所有 futures 完成(包括残留任务自停),确保无残留带到下一批 wait(futures, return_when=ALL_COMPLETED) if not results: logger.warning( f'文件 {txt_path.name}:返回空数组,耗时:{time.time() - start_time:.2f}秒') results = [] # 检查是否有异常结果(如果有未处理的异常,会在 above except 中处理) # 原逻辑:如果 results 有 None 或异常,raise;但现在简化,假设 find 已处理 # 生成对应的 json 文件路径 json_filename = txt_path.with_suffix('.json').name json_path = self.output_files_dir / json_filename # 将结果写入 json 文件(始终 list) with open(json_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False) # 删除已处理的 txt 文件 txt_path.unlink() # logger.info( # f'已顺利处理文件---{txt_path.name}---耗时:{time.time() - start_time}秒') logger.info('---------------------执行结束---------------------') except KeyboardInterrupt: logger.error(f'程序被手动终止!!!') finally: self.stop_event.set() # 通知其他线程立即退出 self.executor.shutdown(wait=True, cancel_futures=True) # 全局清理 logger.info(f'程序执行结束!') def find(self, account_info_str): '''找回密码 - 修改支持 stop_event 检查,立即退出返回 None''' if self.stop_event.is_set(): return None # 已停止,直接返回 # 2、核验账号信息 try: success = self.check() except StopEventException: return None # self.stop_event 触发,无日志,直接返回(避免多余打印) # 其它异常会直接调用os._exit(1) # except Exception as e: # logger.exception(f'处理账号 {account_info} 出现错误: {e}') # # 如果是重试 30 次失败,os._exit(1) 已由装饰器处理 # return None else: if success: return success else: return None @retry_on_failure(retries=20, retry_interval=1, log_from_retry=5, ignore_exceptions=(StopEventException,)) def check(self): '''验证 - 添加 self.stop_event 检查,支持立即退出;重试 30 次失败 raise 全局异常(装饰器处理)''' if self.stop_event.is_set(): raise StopEventException('Stop event set, abort check') # 自定义异常,不重试、无日志 if not hasattr(local_storage, 'cookies'): local_storage.cookies = {} # 在关键 IO 前后检查 event(网络 IO 点) if self.stop_event.is_set(): raise StopEventException('Stop event set during init') # 自定义异常,不重试 return True
-
主任务和子任务都是异步添加
View Codefrom concurrent.futures import ThreadPoolExecutor, as_completed import threading import requests class TaskFailure(Exception): """自定义任务失败异常""" pass class FastFailRunner: def __init__(self): self.stop_event = threading.Event() def fetch_url(self, url): """任何错误直接抛出异常""" if self.stop_event.is_set(): raise TaskFailure("执行已终止") try: response = requests.get(url, timeout=3) return response.status_code except Exception as e: self.stop_event.set() raise TaskFailure(f"URL请求失败: {url} - {str(e)}") def process_task(self, task_id): """处理任务(异常直接向上抛出)""" if self.stop_event.is_set(): raise TaskFailure("任务终止") urls = [f"https://example.com/task{task_id}/{i}" for i in range(3)] with ThreadPoolExecutor(max_workers=3) as executor: futures = [executor.submit(self.fetch_url, url) for url in urls] # 收集所有结果(任一失败会抛出异常) results = [] for future in as_completed(futures): if self.stop_event.is_set(): raise TaskFailure("中途终止") try: result = future.result() results.append(result) except TaskFailure as e: # 第一个失败的任务会触发此处 self.stop_event.set() raise return task_id, results def run(self, total_tasks): """ 执行入口 抛出: TaskFailure - 任一任务失败时 抛出: KeyboardInterrupt - 用户中断时 """ try: with ThreadPoolExecutor(max_workers=3) as executor: futures = [executor.submit(self.process_task, i) for i in range(total_tasks)] for future in as_completed(futures): try: task_id, results = future.result() except TaskFailure as e: # 第一个失败的任务会触发此处 self.stop_event.set() raise # 重新抛出给上层 finally: self.stop_event.set() # 确保释放资源 # 使用示例 if __name__ == "__main__": runner = FastFailRunner() try: runner.run(10) except TaskFailure as e: print(f"! 任务异常终止: {str(e)}") except KeyboardInterrupt: print("! 用户主动中断") finally: print("执行器已关闭")
-
主任务同步+子任务异步
View Codefrom concurrent.futures import ThreadPoolExecutor, as_completed import threading import requests import time class TaskFailure(Exception): """自定义任务失败异常""" pass class RetryableTaskRunner: def __init__(self, max_retries=3, max_workers=3): self.stop_event = threading.Event() self.max_retries = max_retries self.executor = ThreadPoolExecutor(max_workers=max_workers) def fetch_url_with_retry(self, url): """带重试机制的URL请求""" attempt = 0 last_error = None while attempt < self.max_retries and not self.stop_event.is_set(): try: response = requests.get(url, timeout=3) return response.status_code except Exception as e: last_error = e attempt += 1 if attempt < self.max_retries: time.sleep(1) # 重试间隔 self.stop_event.set() raise TaskFailure(f"URL请求失败(尝试{attempt}次): {url} - {str(last_error)}") def process_single_task(self, task_id): """处理单个主任务(包含子任务线程池)""" if self.stop_event.is_set(): raise TaskFailure("执行已终止") urls = [f"https://example.com/task{task_id}/{i}" for i in range(3)] results = [] # 子任务线程池 futures = {self.executor.submit(self.fetch_url_with_retry, url): url for url in urls} try: for future in as_completed(futures): if self.stop_event.is_set(): raise TaskFailure("收到终止信号") try: result = future.result() results.append(result) except TaskFailure as e: raise # 子任务失败后直接抛出 except Exception as e: self.stop_event.set() raise TaskFailure(f"任务{task_id}执行失败: {str(e)}") return task_id, results def run(self, total_tasks): """ 主任务执行入口 返回: 成功完成的任务数 抛出: TaskFailure - 任一任务最终失败时 """ completed = 0 try: # 同步遍历主任务 for i in range(total_tasks): if self.stop_event.is_set(): break try: task_id, results = self.process_single_task(i) completed += 1 print(f"任务{task_id} 成功: {len(results)}条结果") except TaskFailure as e: print(f"! {str(e)}") raise # 终止所有任务 return completed except KeyboardInterrupt: print("用户中断") raise TaskFailure("执行被用户中断") finally: self.stop_event.set() self.executor.shutdown(wait=True) print(f"资源已清理 (共完成{completed}/{total_tasks}个任务)") # 使用示例 if __name__ == "__main__": runner = RetryableTaskRunner(max_retries=2) try: success_count = runner.run(10) print(f"执行完成 (成功: {success_count})") except TaskFailure as e: print(f"! 程序终止: {str(e)}") except Exception as e: print(f"! 未知错误: {str(e)}")
-
线程池优雅退出示例
import random import time from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Event from loguru import logger class TestThreadPoolExecutor: def __init__(self, max_workers=3): self.stop_event = Event() # 用于通知所有任务停止的线程安全事件 self.executor = ThreadPoolExecutor(max_workers=max_workers) # 创建线程池,限制最大工作线程数 def deal_task(self, task): # 处理单个任务的逻辑 logger.info(f'开始执行任务: {task}') if task in ['task_30', 'task_31']: # 模拟特定任务抛出异常 raise ValueError(f'{task}--不允许执行') # 模拟耗时任务,循环休眠以分段执行 for _ in range(int(random.uniform(1, 3) * 10)): if self.stop_event.is_set(): # 检测到停止信号,任务提前退出 logger.info(f'任务 {task} 检测到停止信号,提前退出') return None # 返回 None 表示任务被取消,也可以抛出自定义异常 time.sleep(0.1) # 每次休眠 0.1 秒,模拟耗时操作 logger.info(f'任务 {task} 执行完成') return task # 正常完成返回任务标识 def submit_tasks(self, tasks): # 提交任务到线程池,返回字典,键为 Future 对象,值为任务标识 return {self.executor.submit(self.deal_task, task): task for task in tasks} def process_results(self, futures): # 初始化结果字典,记录每个任务的状态、结果和错误信息 results = [] # 迭代已完成的 Future 对象 for future in as_completed(futures): task = futures[future] # 获取对应的任务标识 try: res = future.result() # 获取任务执行结果 results.append(res) except Exception as e: logger.warning(f'{task} 执行失败: {str(e)}') self.stop_event.set() # 设置停止事件,通知其他任务退出 # 立即抛出异常,导致循环终止,后续任务的退出行为(返回 None)不会被捕获 raise type(e)(f'{task} 执行失败: {str(e)}') from e return results def run(self): # 主运行方法,协调任务提交和结果处理 tasks = [f'task_{i}' for i in range(20, 40)] # 创建任务列表 try: futures = self.submit_tasks(tasks) # 提交所有任务 results = self.process_results(futures) # 处理任务结果 logger.info(f'执行结果:{results}') # 记录最终结果 return results except KeyboardInterrupt: # 捕获用户中断(如 Ctrl+C) logger.error("用户中断") raise KeyboardInterrupt("执行被用户中断") # 抛出中断异常 finally: # 无论正常结束、异常还是中断,都执行清理 self.stop_event.set() # 确保所有任务收到停止信号 self.executor.shutdown(wait=True, cancel_futures=True) # 关闭线程池,等待运行中的任务完成,并取消未运行的任务 if __name__ == '__main__': TestThreadPoolExecutor().run()
-
示例
View Codefrom concurrent.futures import ThreadPoolExecutor, as_completed import threading import time class TaskFailure(Exception): """自定义任务失败异常""" pass class RetryableTaskRunner: def __init__(self, max_retries=3, max_workers=3): self.stop_event = threading.Event() self.max_retries = max_retries self.executor = ThreadPoolExecutor(max_workers=max_workers) def do_task(self, task_id, subtask_id): """带重试机制的子任务执行,支持优雅退出""" attempt = 0 last_error = None while attempt < self.max_retries and not self.stop_event.is_set(): try: print(f"尝试执行 主任务{task_id}的子任务{subtask_id} (尝试{attempt + 1})") # 检查终止信号(避免长时间阻塞的任务无法响应停止) if self.stop_event.is_set(): print(f"检测到终止信号,子任务{subtask_id}提前退出") return None # 或者 raise TaskFailure("任务被终止") # 模拟特定子任务失败(仅用于测试) if task_id == 1 and subtask_id == 1: raise ValueError('模拟子任务失败') # 模拟长时间运行的任务(每次循环检查终止信号) for _ in range(5): # 假设任务分多个步骤执行 if self.stop_event.is_set(): print(f"子任务{subtask_id}中途终止") return None time.sleep(0.5) # 模拟任务分步执行 return f'主任务{task_id}的子任务{subtask_id}结果' except Exception as e: last_error = e attempt += 1 if attempt < self.max_retries: time.sleep(0.5) # 重试间隔 # 如果走到这里,说明任务失败或收到终止信号 if self.stop_event.is_set(): raise TaskFailure(f"子任务{subtask_id}被终止") else: raise TaskFailure(f"子任务{subtask_id}失败(尝试{attempt}次): {str(last_error)}") def process_single_task(self, task_id): """处理单个主任务(包含子任务线程池)""" print(f"\n开始处理主任务 {task_id}") subtasks = [1, 2, 3] # 每个主任务有3个子任务 futures = [self.executor.submit(self.do_task, task_id, subtask) for subtask in subtasks] try: results = [] for future in as_completed(futures): try: result = future.result() results.append(result) print(f"主任务{task_id}的子任务完成: {result}") except Exception as e: print(f"主任务{task_id}的子任务出错: {str(e)}") self.stop_event.set() # 通知所有线程停止 # 取消所有未完成的任务 for f in futures: if not f.done(): f.cancel() raise TaskFailure(f"主任务{task_id}因子任务失败而终止") from e print(f'主任务{task_id}顺利完成') return task_id, results except Exception as e: self.stop_event.set() raise def run(self, total_tasks): """主任务执行入口""" completed = 0 try: for i in range(total_tasks): try: task_id, results = self.process_single_task(i) completed += 1 print(f"主任务{task_id} 成功完成") except TaskFailure as e: print(f"! 主任务{i}失败: {str(e)}") raise # 直接抛出异常终止执行 return completed except KeyboardInterrupt: print("用户中断") self.stop_event.set() raise TaskFailure("执行被用户中断") finally: self.executor.shutdown(wait=True) print(f"资源已清理 (共完成{completed}/{total_tasks}个主任务)") if __name__ == "__main__": runner = RetryableTaskRunner(max_retries=2) success_count = runner.run(3) # 测试3个主任务 print(f"执行完成 (成功: {success_count})")
-
简单示例
View Codefrom concurrent.futures import ThreadPoolExecutor, as_completed import random import time def task(id, duration): time.sleep(duration) return f"任务{id} 耗时{duration:.1f}s" # 准备任务(ID顺序为1-5,但随机持续时间) tasks = [(i, random.uniform(0.5, 2)) for i in range(1, 6)] with ThreadPoolExecutor(max_workers=10) as executor: futures = [executor.submit(task, id, duration) for id, duration in tasks] # 1、as_completed:按照实际完成顺序 print("as_completed完成顺序:") for future in as_completed(futures): # 按实际完成顺序迭代 try: print(future.result()) except Exception as e: print(f'执行出错:{str(e)}') # 可以添加异常处理 # 2、map方式:保持顺序处理 results = executor.map(lambda t: task(t[0], t[1]), tasks) # 注意传参的形式,对于多参数可以使用匿名函数 # results的顺序严格对应tasks的顺序 print("map完成顺序:") for result in results: # 直接迭代results print(result) # print([result for result in results]) # 打印整体执行结果
它们执行任务的总时间通常差不太多,但是as_completed()这种方式性能更好,推荐使用
添加任务的时候都是异步的,如果需要保持顺序,就使用map()注意返回值:
map:迭代的直接是结果
as_completed:需要调用.result()方法取值
如果不使用with...as...,就需要使用executor.shutdown(wait=True)来显示关闭线程池


浙公网安备 33010602011771号