进程池+requests+splash实现
'''
@Description: Demo
@Author: hewm
@Date: 2019-07-29 09:58:31
@LastEditTime: 2019-07-29 14:43:30
@LastEditors:
'''
import requests
import os
import io
import sys
import multiprocessing
import json
from datetime import datetime
from multiprocessing import Pool
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
splash_url = "http://[you_host]:[you_port]/render.html" # splash地址
headers = {
"User-Agent": "9999999",
"ub": "111"
}
def parse_page(response):
"""回调解析"""
print(response)
def run(url):
"""..."""
try:
params = {
"url": url,
"http_method": "GET",
"image": 0,
"timeout ": 30,
'headers': headers,
}
response = requests.post(splash_url,headers={'Content-Type': 'application/json'}, data=json.dumps(params)).text
except Exception as e:
print(e)
else:
return (url,response)
if __name__ == "__main__":
pool = Pool(5) # 5个进程池
obtain_list = ["http://httpbin.org/get"] * 20 # 请求地址
for i in obtain_list:
pool.apply_async(run, (i,), callback=parse_page)
pool.close()
pool.join()