线程池ThreadPoolExecutor

常见的情形:

  • 主任务和子任务都是异步添加

    from concurrent.futures import ThreadPoolExecutor, as_completed
    import threading
    import requests
    
    
    class TaskFailure(Exception):
        """自定义任务失败异常"""
        pass
    
    
    class FastFailRunner:
        def __init__(self):
            self.stop_event = threading.Event()
    
        def fetch_url(self, url):
            """任何错误直接抛出异常"""
            if self.stop_event.is_set():
                raise TaskFailure("执行已终止")
    
            try:
                response = requests.get(url, timeout=3)
                return response.status_code
            except Exception as e:
                self.stop_event.set()
                raise TaskFailure(f"URL请求失败: {url} - {str(e)}")
    
        def process_task(self, task_id):
            """处理任务(异常直接向上抛出)"""
            if self.stop_event.is_set():
                raise TaskFailure("任务终止")
    
            urls = [f"https://example.com/task{task_id}/{i}" for i in range(3)]
            with ThreadPoolExecutor(max_workers=3) as executor:
                futures = [executor.submit(self.fetch_url, url) for url in urls]
    
                # 收集所有结果(任一失败会抛出异常)
                results = []
                for future in as_completed(futures):
                    if self.stop_event.is_set():
                        raise TaskFailure("中途终止")
                    try:
                        result = future.result()
                        results.append(result)
                    except TaskFailure as e:
                        # 第一个失败的任务会触发此处
                        self.stop_event.set()
                        raise
    
                return task_id, results
    
        def run(self, total_tasks):
            """
            执行入口
            抛出: TaskFailure - 任一任务失败时
            抛出: KeyboardInterrupt - 用户中断时
            """
            try:
                with ThreadPoolExecutor(max_workers=3) as executor:
                    futures = [executor.submit(self.process_task, i) for i in range(total_tasks)]
    
                    for future in as_completed(futures):
                        try:
                            task_id, results = future.result()
                        except TaskFailure as e:
                            # 第一个失败的任务会触发此处
                            self.stop_event.set()
                            raise  # 重新抛出给上层
    
            finally:
                self.stop_event.set()  # 确保释放资源
    
    
    # 使用示例
    if __name__ == "__main__":
        runner = FastFailRunner()
        try:
            runner.run(10)
        except TaskFailure as e:
            print(f"! 任务异常终止: {str(e)}")
        except KeyboardInterrupt:
            print("! 用户主动中断")
        finally:
            print("执行器已关闭")
    View Code
  • 主任务同步+子任务异步

    from concurrent.futures import ThreadPoolExecutor, as_completed
    import threading
    import requests
    import time
    
    
    class TaskFailure(Exception):
        """自定义任务失败异常"""
        pass
    
    
    class RetryableTaskRunner:
        def __init__(self, max_retries=3, max_workers=3):
            self.stop_event = threading.Event()
            self.max_retries = max_retries
            self.executor = ThreadPoolExecutor(max_workers=max_workers)
    
        def fetch_url_with_retry(self, url):
            """带重试机制的URL请求"""
            attempt = 0
            last_error = None
    
            while attempt < self.max_retries and not self.stop_event.is_set():
                try:
                    response = requests.get(url, timeout=3)
                    return response.status_code
                except Exception as e:
                    last_error = e
                    attempt += 1
                    if attempt < self.max_retries:
                        time.sleep(1)  # 重试间隔
    
            self.stop_event.set()
            raise TaskFailure(f"URL请求失败(尝试{attempt}次): {url} - {str(last_error)}")
    
        def process_single_task(self, task_id):
            """处理单个主任务(包含子任务线程池)"""
            if self.stop_event.is_set():
                raise TaskFailure("执行已终止")
    
            urls = [f"https://example.com/task{task_id}/{i}" for i in range(3)]
            results = []
    
            # 子任务线程池
            futures = {self.executor.submit(self.fetch_url_with_retry, url): url
                       for url in urls}
    
            try:
                for future in as_completed(futures):
                    if self.stop_event.is_set():
                        raise TaskFailure("收到终止信号")
    
                    try:
                        result = future.result()
                        results.append(result)
                    except TaskFailure as e:
                        raise  # 子任务失败后直接抛出
    
            except Exception as e:
                self.stop_event.set()
                raise TaskFailure(f"任务{task_id}执行失败: {str(e)}")
    
            return task_id, results
    
        def run(self, total_tasks):
            """
            主任务执行入口
            返回: 成功完成的任务数
            抛出: TaskFailure - 任一任务最终失败时
            """
            completed = 0
    
            try:
                # 同步遍历主任务
                for i in range(total_tasks):
                    if self.stop_event.is_set():
                        break
    
                    try:
                        task_id, results = self.process_single_task(i)
                        completed += 1
                        print(f"任务{task_id} 成功: {len(results)}条结果")
                    except TaskFailure as e:
                        print(f"! {str(e)}")
                        raise  # 终止所有任务
    
                return completed
    
            except KeyboardInterrupt:
                print("用户中断")
                raise TaskFailure("执行被用户中断")
            finally:
                self.stop_event.set()
                self.executor.shutdown(wait=True)
                print(f"资源已清理 (共完成{completed}/{total_tasks}个任务)")
    
    
    # 使用示例
    if __name__ == "__main__":
        runner = RetryableTaskRunner(max_retries=2)
    
        try:
            success_count = runner.run(10)
            print(f"执行完成 (成功: {success_count})")
        except TaskFailure as e:
            print(f"! 程序终止: {str(e)}")
        except Exception as e:
            print(f"! 未知错误: {str(e)}")
    View Code
  • 线程池优雅退出示例

    import random
    import time
    from concurrent.futures import ThreadPoolExecutor, as_completed
    from threading import Event
    from loguru import logger
    
    
    class TestThreadPoolExecutor:
        def __init__(self, max_workers=3):
            self.stop_event = Event()  # 用于通知所有任务停止的线程安全事件
            self.executor = ThreadPoolExecutor(max_workers=max_workers)  # 创建线程池,限制最大工作线程数
    
        def deal_task(self, task):
            # 处理单个任务的逻辑
            logger.info(f'开始执行任务: {task}')
            if task in ['task_30', 'task_31']:
                # 模拟特定任务抛出异常
                raise ValueError(f'{task}--不允许执行')
            # 模拟耗时任务,循环休眠以分段执行
            for _ in range(int(random.uniform(1, 3) * 10)):
                if self.stop_event.is_set():
                    # 检测到停止信号,任务提前退出
                    logger.info(f'任务 {task} 检测到停止信号,提前退出')
                    return None  # 返回 None 表示任务被取消,也可以抛出自定义异常
                time.sleep(0.1)  # 每次休眠 0.1 秒,模拟耗时操作
            logger.info(f'任务 {task} 执行完成')
            return task  # 正常完成返回任务标识
    
        def submit_tasks(self, tasks):
            # 提交任务到线程池,返回字典,键为 Future 对象,值为任务标识
            return {self.executor.submit(self.deal_task, task): task for task in tasks}
    
        def process_results(self, futures):
            # 初始化结果字典,记录每个任务的状态、结果和错误信息
            results = []
            # 迭代已完成的 Future 对象
            for future in as_completed(futures):
                task = futures[future]  # 获取对应的任务标识
                try:
                    res = future.result()  # 获取任务执行结果
                    results.append(res)
                except Exception as e:
                    logger.warning(f'{task} 执行失败: {str(e)}')
                    self.stop_event.set()  # 设置停止事件,通知其他任务退出
                    # 立即抛出异常,导致循环终止,后续任务的退出行为(返回 None)不会被捕获
                    raise type(e)(f'{task} 执行失败: {str(e)}') from e
            return results
    
        def run(self):
            # 主运行方法,协调任务提交和结果处理
            tasks = [f'task_{i}' for i in range(20, 40)]  # 创建任务列表
            try:
                futures = self.submit_tasks(tasks)  # 提交所有任务
                results = self.process_results(futures)  # 处理任务结果
                logger.info(f'执行结果:{results}')  # 记录最终结果
                return results
            except KeyboardInterrupt:
                # 捕获用户中断(如 Ctrl+C)
                logger.error("用户中断")
                raise KeyboardInterrupt("执行被用户中断")  # 抛出中断异常
            finally:
                # 无论正常结束、异常还是中断,都执行清理
                self.stop_event.set()  # 确保所有任务收到停止信号
                self.executor.shutdown(wait=True, cancel_futures=True)  # 关闭线程池,等待运行中的任务完成,并取消未运行的任务
    
    
    if __name__ == '__main__':
        TestThreadPoolExecutor().run()
  • 示例

    from concurrent.futures import ThreadPoolExecutor, as_completed
    import threading
    import time
    
    
    class TaskFailure(Exception):
        """自定义任务失败异常"""
        pass
    
    
    class RetryableTaskRunner:
        def __init__(self, max_retries=3, max_workers=3):
            self.stop_event = threading.Event()
            self.max_retries = max_retries
            self.executor = ThreadPoolExecutor(max_workers=max_workers)
    
        def do_task(self, task_id, subtask_id):
            """带重试机制的子任务执行,支持优雅退出"""
            attempt = 0
            last_error = None
    
            while attempt < self.max_retries and not self.stop_event.is_set():
                try:
                    print(f"尝试执行 主任务{task_id}的子任务{subtask_id} (尝试{attempt + 1})")
    
                    # 检查终止信号(避免长时间阻塞的任务无法响应停止)
                    if self.stop_event.is_set():
                        print(f"检测到终止信号,子任务{subtask_id}提前退出")
                        return None  # 或者 raise TaskFailure("任务被终止")
    
                    # 模拟特定子任务失败(仅用于测试)
                    if task_id == 1 and subtask_id == 1:
                        raise ValueError('模拟子任务失败')
    
                    # 模拟长时间运行的任务(每次循环检查终止信号)
                    for _ in range(5):  # 假设任务分多个步骤执行
                        if self.stop_event.is_set():
                            print(f"子任务{subtask_id}中途终止")
                            return None
                        time.sleep(0.5)  # 模拟任务分步执行
    
                    return f'主任务{task_id}的子任务{subtask_id}结果'
    
                except Exception as e:
                    last_error = e
                    attempt += 1
                    if attempt < self.max_retries:
                        time.sleep(0.5)  # 重试间隔
    
            # 如果走到这里,说明任务失败或收到终止信号
            if self.stop_event.is_set():
                raise TaskFailure(f"子任务{subtask_id}被终止")
            else:
                raise TaskFailure(f"子任务{subtask_id}失败(尝试{attempt}次): {str(last_error)}")
    
        def process_single_task(self, task_id):
            """处理单个主任务(包含子任务线程池)"""
    
            print(f"\n开始处理主任务 {task_id}")
            subtasks = [1, 2, 3]  # 每个主任务有3个子任务
            futures = [self.executor.submit(self.do_task, task_id, subtask) for subtask in subtasks]
    
            try:
                results = []
                for future in as_completed(futures):
                    try:
                        result = future.result()
                        results.append(result)
                        print(f"主任务{task_id}的子任务完成: {result}")
                    except Exception as e:
                        print(f"主任务{task_id}的子任务出错: {str(e)}")
                        self.stop_event.set()  # 通知所有线程停止
                        # 取消所有未完成的任务
                        for f in futures:
                            if not f.done():
                                f.cancel()
                        raise TaskFailure(f"主任务{task_id}因子任务失败而终止") from e
    
                print(f'主任务{task_id}顺利完成')
                return task_id, results
    
            except Exception as e:
                self.stop_event.set()
                raise
    
        def run(self, total_tasks):
            """主任务执行入口"""
            completed = 0
    
            try:
                for i in range(total_tasks):
                    try:
                        task_id, results = self.process_single_task(i)
                        completed += 1
                        print(f"主任务{task_id} 成功完成")
                    except TaskFailure as e:
                        print(f"! 主任务{i}失败: {str(e)}")
                        raise  # 直接抛出异常终止执行
    
                return completed
    
            except KeyboardInterrupt:
                print("用户中断")
                self.stop_event.set()
                raise TaskFailure("执行被用户中断")
            finally:
                self.executor.shutdown(wait=True)
                print(f"资源已清理 (共完成{completed}/{total_tasks}个主任务)")
    
    
    if __name__ == "__main__":
        runner = RetryableTaskRunner(max_retries=2)
        success_count = runner.run(3)  # 测试3个主任务
        print(f"执行完成 (成功: {success_count})")
    View Code
  • 简单示例

    from concurrent.futures import ThreadPoolExecutor, as_completed
    import random
    import time
    
    
    def task(id, duration):
        time.sleep(duration)
        return f"任务{id} 耗时{duration:.1f}s"
    
    
    # 准备任务(ID顺序为1-5,但随机持续时间)
    tasks = [(i, random.uniform(0.5, 2)) for i in range(1, 6)]
    
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(task, id, duration) for id, duration in tasks]
    
        # 1、as_completed:按照实际完成顺序
        print("as_completed完成顺序:")
        for future in as_completed(futures):  # 按实际完成顺序迭代
            try:
                print(future.result())
            except Exception as e:
                print(f'执行出错:{str(e)}') # 可以添加异常处理
    
    
        # 2、map方式:保持顺序处理
        results = executor.map(lambda t: task(t[0], t[1]), tasks) # 注意传参的形式,对于多参数可以使用匿名函数
        # results的顺序严格对应tasks的顺序
        print("map完成顺序:")
        for result in results: # 直接迭代results
            print(result)
        # print([result for result in results]) # 打印整体执行结果
    View Code

    它们执行任务的总时间通常差不太多,但是as_completed()这种方式性能更好,推荐使用
    添加任务的时候都是异步的,如果需要保持顺序,就使用map()

    注意返回值:

      map:迭代的直接是结果
      as_completed:需要调用.result()方法取值
    如果不使用with...as...,就需要使用executor.shutdown(wait=True)来显示关闭线程池

 

posted @ 2025-04-17 01:46  eliwang  阅读(30)  评论(0)    收藏  举报