多线程的实现方式

对于微秒级任务，优先使用多线程。

1微秒=0.000001秒

对于毫秒级及以上任务，逐步增加进程数，找到性能拐点。

1毫秒=0.001秒

标准库concurrent.futures

from concurrent.futures import ThreadPoolExecutor, as_completed
import requests

def fetch_url(url):
    response = requests.get(url)
    response.encoding = response.apparent_encoding
    # 只有return才能被future获取到
    return response.text

urls = ["https://www.baidu.com", "https://chat.deepseek.com/"]

# 方式1
with ThreadPoolExecutor(max_workers=10) as executor:
    # 提交所有任务
    executor_map = executor.map(fetch_url, urls)

    for future in executor_map:
        print(future)
# 方式2
with ThreadPoolExecutor(max_workers=2) as executor:
    # 提交所有任务
    futures = {executor.submit(fetch_url, url): url for url in urls}

    # 按完成顺序处理结果
    for future in as_completed(futures):
        url = futures[future]
        try:
            data = future.result()
            print(f"{url} 抓取成功，长度: {len(data)}")
        except Exception as e:
            print(f"{url} 抓取失败: {e}")

普通线程池

import concurrent.futures
#定义参数列表
hrefs=[]
# 定义单个耗时任务
def process_href(href):
...
#创建线程池
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    executor.map(process_href, hrefs)

threading.Thread

# 1.导入线程模块
import threading
import time

def sing(num, songName):
    for i in range(num):
        print(f"演唱{songName}第{i + 1}次")
        time.sleep(1)


def dance(num):
    for i in range(num):
        print(f"跳舞...第{i + 1}次")
        time.sleep(1)


# 2.创建线程对象
if __name__ == '__main__':
    singThread = threading.Thread(target=sing, args=(3, "难忘今宵"))
    danceThread = threading.Thread(target=dance, args=(3,))

# 3.启动线程
    singThread.start()
    danceThread.start()
'''
线程参数说明：
    初始化源码：
        def __init__(self, group=None, target=None, name=None,args=(), kwargs=None, *, daemon=None)
    
    参数说明：
        group：目前只能为None
        target：要执行的函数名（方法名）
        name：进程名，一般不用设置，默认是Thread   -n
        args：函数的参数，注意为元组类型("a","b")
        kwargs:字典类型的参数
        daemon：是否守护线程
'''

继承Thread类，重写它的run方法

#!/usr/bin/python3

import threading
import time

exitFlag = 0

class myThread (threading.Thread):
    def __init__(self, threadID, name, delay):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.delay = delay
    def run(self):
        print ("开始线程：" + self.name)
        print_time(self.name, self.delay, 5)
        print ("退出线程：" + self.name)

def print_time(threadName, delay, counter):
    while counter:
        if exitFlag:
            threadName.exit()
        time.sleep(delay)
        print ("%s: %s" % (threadName, time.ctime(time.time())))
        counter -= 1

# 创建新线程
thread1 = myThread(1, "Thread-1", 1)
thread2 = myThread(2, "Thread-2", 2)

# 开启新线程
thread1.start()
thread2.start()
thread1.join()
thread2.join()
print ("退出主线程")

锁

lock = threading.Lock()
# 加锁方法1def changeMoney(n):
    global money
    # 获取锁，用于线程同步
    with lock:
        money += n
        money -= n

# 加锁方式2：使用 Thread 对象的 Lock 和 Rlock 可以实现简单的线程同步，这两个对象都有 acquire 方法和 release 方法，对于那些需要每次只允许一个线程操作的数据，可以将其操作放到 acquire 和 release 方法之间。如下：
def changeMoney(n):
    global money
    # 获取锁，用于线程同步
    lock.acquire()
    money += n
    money -= n
    # 释放锁，开启下一个线程
    lock.release()

等待终止

如在一个线程B中调用thread1.join()，则thread1结束后，线程B才会接着thread1.join()往后运行。

# 获取原文件夹所有文件列表
    listdir = os.listdir(sourceDir)
    threads = []
    for file in listdir:
        thread = threading.Thread(target=copyFile, args=(file, sourceDir, destDir))
        thread.start()
        threads.append(thread)
    for thread in threads:
        thread.join()
    print("全部拷贝完毕！")

根据计算机CPU核数动态设置线程数

1. 获取 CPU 核数

Python 提供了两种方式获取 CPU 核数：

import os
# (1) 使用 os 模块
cpu_count = os.cpu_count()
print(f"CPU 核数（逻辑核心）：{cpu_count}")#CPU 核数（逻辑核心）：16
# (2) 使用 multiprocessing 模块
import multiprocessing
cpu_count = multiprocessing.cpu_count()
print(f"CPU 核数（逻辑核心）：{cpu_count}")#CPU 核数（逻辑核心）：16

# 注意：
#os.cpu_count()和multiprocessing.cpu_count()返回的是 逻辑核心数（如支持超线程的 CPU 会返回双倍物理核心数）。

# 若需获取物理核心数（需安装第三方库如 psutil）：
import psutil

physical_cores = psutil.cpu_count(logical=False)
print(f"物理核心数：{physical_cores}")#物理核心数：8

2. 动态设置线程池大小

根据 CPU 核数创建线程池，优化资源利用：

(1) 基本示例

from concurrent.futures import ThreadPoolExecutor
import os

def task(n):
    return n * n

if __name__ == "__main__":
    cpu_cores = os.cpu_count() or 4  # 默认 4 核（避免返回 None）
    with ThreadPoolExecutor(max_workers=cpu_cores) as executor:
        results = executor.map(task, range(10))
        print(list(results))  # 输出: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

(2) 结合爬虫优化

针对网络请求（I/O 密集型任务），可适当增大线程数（如 2 * cpu_cores）：

import requests
from concurrent.futures import ThreadPoolExecutor

def fetch_url(url):
    response = requests.get(url)
    return response.status_code

if __name__ == "__main__":
    urls = ["https://www.example.com"] * 100
    cpu_cores = os.cpu_count() or 4
    max_workers = min(2 * cpu_cores, 32)  # 限制最大线程数（避免资源耗尽）
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = executor.map(fetch_url, urls)
        for url, status in zip(urls, results):
            print(f"{url} → 状态码：{status}")

3. 优化建议

(1) 任务类型决定线程数

任务类型	线程数建议
I/O 密集型	可设置较高（如 `2 * cpu_cores` 或更高）
CPU 密集型	建议等于物理核心数（避免过多线程竞争）

(2) 动态调整策略

import psutil

def get_optimal_workers():
    physical_cores = psutil.cpu_count(logical=False)  # 物理核心数
    logical_cores = psutil.cpu_count(logical=True)    # 逻辑核心数
    # I/O 密集型：逻辑核心数的 2 倍
    return min(2 * logical_cores, 32)

max_workers = get_optimal_workers()

(3) 异常处理

try:
    cpu_count = os.cpu_count()
except NotImplementedError:
    cpu_count = 4  # 兼容无 CPU 信息的系统

4. 注意事项

线程安全：确保共享资源（如文件写入）使用锁（threading.Lock）。
超时控制：为任务添加超时（timeout 参数）避免线程阻塞。
资源限制：避免线程数过高导致内存或网络带宽耗尽。

通过动态设置线程池大小，既能充分利用 CPU 资源，又能避免过多线程切换带来的性能损失。

posted @ 2025-03-26 09:20 指尖下的世界阅读(15) 评论(0) 收藏举报

刷新页面返回顶部

指尖下的世界

今日事今日毕,今日无事早休息.