简单的爬虫开协程和不开协程对比

不开协程

import time
from urllib import request


def func(name, url):
    ret = request.urlopen(url)
    with open(name+'.html', 'wb') as f:
        f.write(ret.read())


url_lst = [
    ('python', 'https://www.python.org/'),
    ('blog', 'http://www.cnblogs.com/Eva-J/articles/8324673.html'),
    ('pypi', 'https://pypi.org/project/pip/'),
    ('blog2', 'https://www.cnblogs.com/z-x-y/p/9237706.html'),
    ('douban', 'https://www.douban.com/')
]

start = time.time()
for url_item in url_lst:
    func(*url_item)
end = time.time()
print(end - start)

 

开协程

from gevent import monkey
monkey.patch_all()
import gevent
import time
from urllib import request


def func(name, url):
    ret = request.urlopen(url)
    with open(name+'.html', 'wb') as f:
        f.write(ret.read())


url_lst = [
    ('python1', 'https://www.python.org/'),
    ('blog1', 'http://www.cnblogs.com/Eva-J/articles/8324673.html'),
    ('pypi1', 'https://pypi.org/project/pip/'),
    ('blog21', 'https://www.cnblogs.com/z-x-y/p/9237706.html'),
    ('douban1', 'https://www.douban.com/')
]
start = time.time()
g_l = []
for url_item in url_lst:
    g = gevent.spawn(func, *url_item)
    g_l.append(g)
gevent.joinall(g_l)
end = time.time()
print(end - start)

 

posted @ 2019-01-31 16:08  平平无奇小辣鸡  阅读(203)  评论(0)    收藏  举报