简单的爬虫开协程和不开协程对比
不开协程
import time from urllib import request def func(name, url): ret = request.urlopen(url) with open(name+'.html', 'wb') as f: f.write(ret.read()) url_lst = [ ('python', 'https://www.python.org/'), ('blog', 'http://www.cnblogs.com/Eva-J/articles/8324673.html'), ('pypi', 'https://pypi.org/project/pip/'), ('blog2', 'https://www.cnblogs.com/z-x-y/p/9237706.html'), ('douban', 'https://www.douban.com/') ] start = time.time() for url_item in url_lst: func(*url_item) end = time.time() print(end - start)
开协程
from gevent import monkey monkey.patch_all() import gevent import time from urllib import request def func(name, url): ret = request.urlopen(url) with open(name+'.html', 'wb') as f: f.write(ret.read()) url_lst = [ ('python1', 'https://www.python.org/'), ('blog1', 'http://www.cnblogs.com/Eva-J/articles/8324673.html'), ('pypi1', 'https://pypi.org/project/pip/'), ('blog21', 'https://www.cnblogs.com/z-x-y/p/9237706.html'), ('douban1', 'https://www.douban.com/') ] start = time.time() g_l = [] for url_item in url_lst: g = gevent.spawn(func, *url_item) g_l.append(g) gevent.joinall(g_l) end = time.time() print(end - start)

浙公网安备 33010602011771号