多进程的爬虫

from multiprocessing import Pool
import requests
import os


def get_page(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text


if __name__ == '__main__':
    urls = [
        'https://www.baidu.com',
        'https://www.jd.com',
        'https://www.taobao.com',
    ]
    cpu_count = os.cpu_count()
    p = Pool(cpu_count)
    res_l = []
    for url in urls:
        res = p.apply_async(get_page, args=(url,))
        res_l.append(res)
    p.close()
    p.join()
    for res in res_l:
        print(res.get())

posted @ 2017-10-10 17:15  hzxPeter  阅读(48)  评论(0)    收藏  举报