python拓展(进程池+requests+splash实现)

进程池+requests+splash实现

'''
@Description: Demo
@Author: hewm
@Date: 2019-07-29 09:58:31
@LastEditTime: 2019-07-29 14:43:30
@LastEditors:
'''
import requests
import os
import io
import sys
import multiprocessing
import json
from datetime import datetime
from multiprocessing import Pool
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')

splash_url = "http://[you_host]:[you_port]/render.html"  # splash地址

headers = {
    "User-Agent": "9999999",
    "ub": "111"
}

def parse_page(response):
    """回调解析"""
    print(response)


def run(url):
    """..."""
    try: 
        params = {
            "url": url,
            "http_method": "GET",
            "image": 0,
            "timeout ": 30,
            'headers': headers,
        }
        response = requests.post(splash_url,headers={'Content-Type': 'application/json'}, data=json.dumps(params)).text
    except Exception as e:
        print(e)
    else:
        return (url,response)

if __name__ == "__main__":
    pool = Pool(5)  # 5个进程池
    obtain_list = ["http://httpbin.org/get"] * 20  # 请求地址
    for i in obtain_list:
        pool.apply_async(run, (i,), callback=parse_page)
    pool.close()
    pool.join()

 

posted @ 2019-07-29 14:52  争-渡  阅读(498)  评论(0)    收藏  举报