第十七章 - 使用期物处理并发
使用无期并发
1、concurrent.future模块: ThreadPoolExcutor, ProcessPoolExcutor
2、应该使用协程,而不是多线程
3、可以使用Pypy来实现CPU密集型的工作
17.1 示例:网络下载的三种风格
示例1: flags.py顺序下载
import os import time import sys import requests POP20_CC = ("CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR TR CD FR").split() BASE_URL = "http://flupy.org/data/flags" DEST_DIR = "downloads/" def save_flag(img, filename): path = os.path.join(DEST_DIR, filename) with open(path, "wb") as fp: fp.write(img) def get_flag(cc): url = "{}/{cc}/{cc}.gif".format(BASE_URL, cc=cc.lower()) res = requests.get(url) return res.content def show(text): print(text, end=" ") sys.stdout.flush() # 强制刷新stdout标准缓冲区 def download_many(cc_list): for cc in sorted(cc_list): image = get_flag(cc) show(cc) save_flag(image, cc.lower() + ".gif") return len(cc_list) def main(download_many): t0 = time.time() count = download_many(POP20_CC) elapsed = time.time() - t0 msg = "\n{} flags downloaded in {:.2f}s" print(msg.format(count, elapsed)) if __name__ == '__main__': main(download_many)
示例2: flags_threadpool.py 使用线程并发下载
import os import requests import sys import time from concurrent import futures POP20_CC = ("CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR TR CD FR").split() BASE_URL = "http://flupy.org/data/flags" DEST_DIR = "downloads/" MAX_WORKERS = 20 def save_flag(img, filename): path = os.path.join(DEST_DIR, filename) with open(path, "wb") as fp: fp.write(img) def get_flag(cc): url = "{}/{cc}/{cc}.gif".format(BASE_URL, cc=cc.lower()) res = requests.get(url) return res.content def show(text): print(text, end=" ") sys.stdout.flush() def download_one(cc): image = get_flag(cc) show(cc) save_flag(image, cc.lower() + ".gif") return cc def download_many(cc_list): workers = min(MAX_WORKERS, len(cc_list)) with futures.ThreadPoolExecutor(workers) as executor: res = executor.map(download_one, sorted(cc_list)) return len(list(res)) def main(download): t0 = time.time() count = download(POP20_CC) elapsed = time.time() - t0 msg = "\n{} flags downloaded in {:.2f}s" print(msg.format(count, elapsed)) if __name__ == '__main__': main(download_many)