Day 17 17.5 各种线程方法对比下载斗图吧图片

各种线程方法对比

1 threading.Thread 方法

1.1 步骤如下:

  • 调用threading 方法的构造创建一个线程池。

  • 定义一个普通函数作为线程任务。

  • 调用 threading 对象的 start() 方法来提交线程任务。

  • 调用 threading 对象的 join() 方法来实现线程的阻塞。

1.2 代码:

import threading


def func(i):
    print(i)


if __name__ == '__main__':
    t_list = []
    for i in range(1000):
        t = threading.Thread(target=func, args=(i,))
        t.start()
        t_list.append(t)

    for t in t_list:
        t.join()

1.3 案例:

# 网址 :# https://www.pkdoutu.com/photo/list/

import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading

fake_ua = UserAgent()
headers = {
    "User-Agent": fake_ua.random
}


def get_urls():
    first_url = "https://www.pkdoutu.com/photo/list/"
    response = requests.get(first_url, headers=headers)
    response.encoding = "utf8"
    page_text = response.text
    tree = etree.HTML(page_text)
    a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
    urls_list = []
    for a in a_lists:
        img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
        urls_list.append(img_src)

    return urls_list


def download_photo(url):
    file_name = 'doutuba'
    if not os.path.exists(file_name):
        os.mkdir(file_name)
    img_title = os.path.basename(url)
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    data = response.iter_content()
    file_path = file_name + '/' + img_title
    file_path = os.path.join(file_name, img_title)
    try:
        with open(file_path, 'wb') as f:
            for i in data:
                f.write(i)
            f.close()
        print(f'{img_title}图片已下载完成')
    except Exception as e:
        print(e)


def main4():
    # threading方法  download_photo
    start_time = time.time()
    urls_list = get_urls()
    t_lists = []
    for url in urls_list:
        t = threading.Thread(target=download_photo, args=(url,))
        t.start()
        t_lists.append(t)

    for t in t_lists:
        t.join()
    print(f'总耗时为:{time.time() - start_time}s')



if __name__ == '__main__':
    main4()
#  # 总耗时为:18.68275022506714s

2 multiprocessing.Process 方法

2.1 步骤如下:

  • 调用 multiprocessing.Process 方法创建一个线程池。

  • 定义一个普通函数作为线程任务。

  • 调用 multiprocessing.Process 对象的 start() 方法来提交线程任务。

  • 调用 multiprocessing.Process 对象的 join() 方法来阻塞线程池。

2.2 代码:

import multiprocessing


def func(i):
    print(i)


if __name__ == '__main__':
    t_list = []
    for i in range(100):
        t = multiprocessing.Process(target=func, args=(i,))
        t.start()
        t_list.append(t)

    for t in t_list:
        t.join()

2.3 案例

# 网址 :# https://www.pkdoutu.com/photo/list/

import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading

fake_ua = UserAgent()
headers = {
    "User-Agent": fake_ua.random
}


def get_urls():
    first_url = "https://www.pkdoutu.com/photo/list/"
    response = requests.get(first_url, headers=headers)
    response.encoding = "utf8"
    page_text = response.text
    tree = etree.HTML(page_text)
    a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
    urls_list = []
    for a in a_lists:
        img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
        urls_list.append(img_src)

    return urls_list


def download_photo(url):
    file_name = 'doutuba'
    if not os.path.exists(file_name):
        os.mkdir(file_name)
    img_title = os.path.basename(url)
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    data = response.iter_content()
    file_path = file_name + '/' + img_title
    file_path = os.path.join(file_name, img_title)
    try:
        with open(file_path, 'wb') as f:
            for i in data:
                f.write(i)
            f.close()
        print(f'{img_title}图片已下载完成')
    except Exception as e:
        print(e)


def main2():
    # multiprocessing方法
    start_time = time.time()
    urls_list = get_urls()
    t_list = []
    for url in urls_list:
        t = multiprocessing.Process(target=download_photo, args=(url,))
        t.start()
        t_list.append(t)
    for t in t_list:
        t.join()

    print(f'总耗时为:{time.time() - start_time}s')


if __name__ == '__main__':
    main2()

# # 总耗时为:42.19643306732178s

3 ThreadPoolExecutor 类方法(线程池)

3.1 步骤如下:

  • 调用 ThreadPoolExecutor 类的构造器创建一个线程池。

  • 定义一个普通函数作为线程任务。

  • 调用 ThreadPoolExecutor 对象的 submit() 方法来提交线程任务。

  • 调用 ThreadPoolExecutor 对象的 shutdown(wait = True) 方法来关闭线程池。

3.2 代码

from concurrent.futures import ThreadPoolExecutor

def func(i):
    print(i)


if __name__ == '__main__':
    pool = ThreadPoolExecutor(max_workers=x) # x 为预设线程数
    for i in range(1000):
        pool.submit(func,i)
    pool.shutdown(wait=True)

3.3 案例

# 网址 :# https://www.pkdoutu.com/photo/list/

import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading

fake_ua = UserAgent()
headers = {
    "User-Agent": fake_ua.random
}


def get_urls():
    first_url = "https://www.pkdoutu.com/photo/list/"
    response = requests.get(first_url, headers=headers)
    response.encoding = "utf8"
    page_text = response.text
    tree = etree.HTML(page_text)
    a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
    urls_list = []
    for a in a_lists:
        img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
        urls_list.append(img_src)

    return urls_list


def download_photo(url):
    file_name = 'doutuba'
    if not os.path.exists(file_name):
        os.mkdir(file_name)
    img_title = os.path.basename(url)
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    data = response.iter_content()
    file_path = file_name + '/' + img_title
    file_path = os.path.join(file_name, img_title)
    try:
        with open(file_path, 'wb') as f:
            for i in data:
                f.write(i)
            f.close()
        print(f'{img_title}图片已下载完成')
    except Exception as e:
        print(e)


def main3():
    # ThreadPoolExecutor 方法
    start_time = time.time()
    urls_list = get_urls()
    pool = ThreadPoolExecutor(max_workers=4)
    t_list = []
    for url in urls_list:
        pool.submit(download_photo, url)
    pool.shutdown(wait=True)
    print(f'总耗时为:{time.time() - start_time}s')
    # 总耗时为:22.81324315071106s



if __name__ == '__main__':
    main3()

4 总案例

# 网址 :# https://www.pkdoutu.com/photo/list/

import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading

fake_ua = UserAgent()
headers = {
    "User-Agent": fake_ua.random
}


def get_urls():
    first_url = "https://www.pkdoutu.com/photo/list/"
    response = requests.get(first_url, headers=headers)
    response.encoding = "utf8"
    page_text = response.text
    tree = etree.HTML(page_text)
    a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
    urls_list = []
    for a in a_lists:
        img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
        urls_list.append(img_src)

    return urls_list


def download_photo(url):
    file_name = 'doutuba'
    if not os.path.exists(file_name):
        os.mkdir(file_name)
    img_title = os.path.basename(url)
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'
    data = response.iter_content()
    file_path = file_name + '/' + img_title
    file_path = os.path.join(file_name, img_title)
    try:
        with open(file_path, 'wb') as f:
            for i in data:
                f.write(i)
            f.close()
        print(f'{img_title}图片已下载完成')
    except Exception as e:
        print(e)


def main1():
    # multiprocessing方法
    start_time = time.time()
    urls_list = get_urls()
    for url in urls_list:
        t = multiprocessing.Process(target=download_photo, args=(url,))
        t.start()
        t.join()

    print(f'总耗时为:{time.time() - start_time}s')
    # 总耗时为:127.70335650444031s


def main2():
    # multiprocessing方法
    start_time = time.time()
    urls_list = get_urls()
    t_list = []
    for url in urls_list:
        t = multiprocessing.Process(target=download_photo, args=(url,))
        t.start()
        t_list.append(t)
    for t in t_list:
        t.join()

    print(f'总耗时为:{time.time() - start_time}s')
    # 总耗时为:42.19643306732178s


def main3():
    # ThreadPoolExecutor 方法
    start_time = time.time()
    urls_list = get_urls()
    pool = ThreadPoolExecutor(max_workers=4)
    t_list = []
    for url in urls_list:
        pool.submit(download_photo, url)
    pool.shutdown(wait=True)
    print(f'总耗时为:{time.time() - start_time}s')
    # 总耗时为:22.81324315071106s


def main4():
    # threading方法  download_photo
    start_time = time.time()
    urls_list = get_urls()
    t_lists = []
    for url in urls_list:
        t = threading.Thread(target=download_photo, args=(url,))
        t.start()
        t_lists.append(t)

    for t in t_lists:
        t.join()
    print(f'总耗时为:{time.time() - start_time}s')
    # 总耗时为:18.68275022506714s


if __name__ == '__main__':
    main4()
posted @ 2023-03-19 11:57  Chimengmeng  阅读(49)  评论(0)    收藏  举报