Day 17 17.5 各种线程方法对比下载斗图吧图片
各种线程方法对比
1 threading.Thread 方法
1.1 步骤如下:
-
调用threading 方法的构造创建一个线程池。
-
定义一个普通函数作为线程任务。
-
调用 threading 对象的 start() 方法来提交线程任务。
-
调用 threading 对象的 join() 方法来实现线程的阻塞。
1.2 代码:
import threading
def func(i):
print(i)
if __name__ == '__main__':
t_list = []
for i in range(1000):
t = threading.Thread(target=func, args=(i,))
t.start()
t_list.append(t)
for t in t_list:
t.join()
1.3 案例:
# 网址 :# https://www.pkdoutu.com/photo/list/
import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading
fake_ua = UserAgent()
headers = {
"User-Agent": fake_ua.random
}
def get_urls():
first_url = "https://www.pkdoutu.com/photo/list/"
response = requests.get(first_url, headers=headers)
response.encoding = "utf8"
page_text = response.text
tree = etree.HTML(page_text)
a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
urls_list = []
for a in a_lists:
img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
urls_list.append(img_src)
return urls_list
def download_photo(url):
file_name = 'doutuba'
if not os.path.exists(file_name):
os.mkdir(file_name)
img_title = os.path.basename(url)
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
data = response.iter_content()
file_path = file_name + '/' + img_title
file_path = os.path.join(file_name, img_title)
try:
with open(file_path, 'wb') as f:
for i in data:
f.write(i)
f.close()
print(f'{img_title}图片已下载完成')
except Exception as e:
print(e)
def main4():
# threading方法 download_photo
start_time = time.time()
urls_list = get_urls()
t_lists = []
for url in urls_list:
t = threading.Thread(target=download_photo, args=(url,))
t.start()
t_lists.append(t)
for t in t_lists:
t.join()
print(f'总耗时为:{time.time() - start_time}s')
if __name__ == '__main__':
main4()
# # 总耗时为:18.68275022506714s
2 multiprocessing.Process 方法
2.1 步骤如下:
-
调用 multiprocessing.Process 方法创建一个线程池。
-
定义一个普通函数作为线程任务。
-
调用 multiprocessing.Process 对象的 start() 方法来提交线程任务。
-
调用 multiprocessing.Process 对象的 join() 方法来阻塞线程池。
2.2 代码:
import multiprocessing
def func(i):
print(i)
if __name__ == '__main__':
t_list = []
for i in range(100):
t = multiprocessing.Process(target=func, args=(i,))
t.start()
t_list.append(t)
for t in t_list:
t.join()
2.3 案例
# 网址 :# https://www.pkdoutu.com/photo/list/
import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading
fake_ua = UserAgent()
headers = {
"User-Agent": fake_ua.random
}
def get_urls():
first_url = "https://www.pkdoutu.com/photo/list/"
response = requests.get(first_url, headers=headers)
response.encoding = "utf8"
page_text = response.text
tree = etree.HTML(page_text)
a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
urls_list = []
for a in a_lists:
img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
urls_list.append(img_src)
return urls_list
def download_photo(url):
file_name = 'doutuba'
if not os.path.exists(file_name):
os.mkdir(file_name)
img_title = os.path.basename(url)
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
data = response.iter_content()
file_path = file_name + '/' + img_title
file_path = os.path.join(file_name, img_title)
try:
with open(file_path, 'wb') as f:
for i in data:
f.write(i)
f.close()
print(f'{img_title}图片已下载完成')
except Exception as e:
print(e)
def main2():
# multiprocessing方法
start_time = time.time()
urls_list = get_urls()
t_list = []
for url in urls_list:
t = multiprocessing.Process(target=download_photo, args=(url,))
t.start()
t_list.append(t)
for t in t_list:
t.join()
print(f'总耗时为:{time.time() - start_time}s')
if __name__ == '__main__':
main2()
# # 总耗时为:42.19643306732178s
3 ThreadPoolExecutor 类方法(线程池)
3.1 步骤如下:
-
调用 ThreadPoolExecutor 类的构造器创建一个线程池。
-
定义一个普通函数作为线程任务。
-
调用 ThreadPoolExecutor 对象的 submit() 方法来提交线程任务。
-
调用 ThreadPoolExecutor 对象的 shutdown(wait = True) 方法来关闭线程池。
3.2 代码
from concurrent.futures import ThreadPoolExecutor
def func(i):
print(i)
if __name__ == '__main__':
pool = ThreadPoolExecutor(max_workers=x) # x 为预设线程数
for i in range(1000):
pool.submit(func,i)
pool.shutdown(wait=True)
3.3 案例
# 网址 :# https://www.pkdoutu.com/photo/list/
import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading
fake_ua = UserAgent()
headers = {
"User-Agent": fake_ua.random
}
def get_urls():
first_url = "https://www.pkdoutu.com/photo/list/"
response = requests.get(first_url, headers=headers)
response.encoding = "utf8"
page_text = response.text
tree = etree.HTML(page_text)
a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
urls_list = []
for a in a_lists:
img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
urls_list.append(img_src)
return urls_list
def download_photo(url):
file_name = 'doutuba'
if not os.path.exists(file_name):
os.mkdir(file_name)
img_title = os.path.basename(url)
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
data = response.iter_content()
file_path = file_name + '/' + img_title
file_path = os.path.join(file_name, img_title)
try:
with open(file_path, 'wb') as f:
for i in data:
f.write(i)
f.close()
print(f'{img_title}图片已下载完成')
except Exception as e:
print(e)
def main3():
# ThreadPoolExecutor 方法
start_time = time.time()
urls_list = get_urls()
pool = ThreadPoolExecutor(max_workers=4)
t_list = []
for url in urls_list:
pool.submit(download_photo, url)
pool.shutdown(wait=True)
print(f'总耗时为:{time.time() - start_time}s')
# 总耗时为:22.81324315071106s
if __name__ == '__main__':
main3()
4 总案例
# 网址 :# https://www.pkdoutu.com/photo/list/
import requests
from fake_useragent import UserAgent
import random
from lxml import etree
import os
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor
import threading
fake_ua = UserAgent()
headers = {
"User-Agent": fake_ua.random
}
def get_urls():
first_url = "https://www.pkdoutu.com/photo/list/"
response = requests.get(first_url, headers=headers)
response.encoding = "utf8"
page_text = response.text
tree = etree.HTML(page_text)
a_lists = tree.xpath('//*[@id="pic-detail"]/div/div[2]/div[2]/ul/li/div/div/a')
urls_list = []
for a in a_lists:
img_src = a.xpath('./img[@data-backup]/@data-backup')[0]
urls_list.append(img_src)
return urls_list
def download_photo(url):
file_name = 'doutuba'
if not os.path.exists(file_name):
os.mkdir(file_name)
img_title = os.path.basename(url)
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
data = response.iter_content()
file_path = file_name + '/' + img_title
file_path = os.path.join(file_name, img_title)
try:
with open(file_path, 'wb') as f:
for i in data:
f.write(i)
f.close()
print(f'{img_title}图片已下载完成')
except Exception as e:
print(e)
def main1():
# multiprocessing方法
start_time = time.time()
urls_list = get_urls()
for url in urls_list:
t = multiprocessing.Process(target=download_photo, args=(url,))
t.start()
t.join()
print(f'总耗时为:{time.time() - start_time}s')
# 总耗时为:127.70335650444031s
def main2():
# multiprocessing方法
start_time = time.time()
urls_list = get_urls()
t_list = []
for url in urls_list:
t = multiprocessing.Process(target=download_photo, args=(url,))
t.start()
t_list.append(t)
for t in t_list:
t.join()
print(f'总耗时为:{time.time() - start_time}s')
# 总耗时为:42.19643306732178s
def main3():
# ThreadPoolExecutor 方法
start_time = time.time()
urls_list = get_urls()
pool = ThreadPoolExecutor(max_workers=4)
t_list = []
for url in urls_list:
pool.submit(download_photo, url)
pool.shutdown(wait=True)
print(f'总耗时为:{time.time() - start_time}s')
# 总耗时为:22.81324315071106s
def main4():
# threading方法 download_photo
start_time = time.time()
urls_list = get_urls()
t_lists = []
for url in urls_list:
t = threading.Thread(target=download_photo, args=(url,))
t.start()
t_lists.append(t)
for t in t_lists:
t.join()
print(f'总耗时为:{time.time() - start_time}s')
# 总耗时为:18.68275022506714s
if __name__ == '__main__':
main4()
本文来自博客园,作者:Chimengmeng,转载请注明原文链接:https://www.cnblogs.com/dream-ze/p/17232747.html

浙公网安备 33010602011771号