Python并发请求之requests_future模块使用

# -*- coding: utf-8 -*-
# @Time : 2019-12-09 10:00
# @Author : cxa
# @File : demo.py
# @Software: PyCharm
from requests_futures.sessions import FuturesSession
from concurrent.futures import as_completed
from lxml import html
import time

url = ["http://www.baidu.com", "http://www.163.com", "http://www.google.com", "http://www.cnblogs.com/c-x-a"]


def get_node(source, x=".//head/title//text()"):
    root = html.fromstring(source)
    node = root.xpath(x)
    return node


def response_hook(resp, *args, **kwargs):
    start = time.time()
    resp.encoding = resp.apparent_encoding
    resp.data = resp.text
    resp.code = resp.status_code
    resp.headers = resp.headers
    resp.elapsed = time.time() - start


def get_req():
    with FuturesSession(max_workers=4) as session:
        futures = [session.get(i, hooks={"response": response_hook}) for i in url]
        for future in as_completed(futures):
            resp = future.result()
            print("状态码", resp.code)
            print("标题", get_node(resp.data)[0])
            print("耗时", resp.elapsed)
            print("="*30)

if __name__ == '__main__':
    get_req()

修改之后

from requests_futures.sessions import FuturesSession
from concurrent.futures import as_completed
from lxml import html
import time

url = ["http://www.baidu.com", "http://www.163.com", "http://www.google.com", "http://www.cnblogs.com/c-x-a"]


class MySession(FuturesSession):

    def request(self, method, url, hooks=None, *args, **kwargs):
        start = time.time()
        if hooks is None:
            hooks = {}

        def response_hook(resp, *args, **kwargs):
            resp.encoding = resp.apparent_encoding
            resp.data = resp.text
            resp.code = resp.status_code
            resp.headers = resp.headers
            resp.elapsed = time.time() - start

        try:
            if isinstance(hooks['response'], (list, tuple)):
                hooks['response'].insert(0, response_hook)
            else:
                hooks['response'] = [response_hook, hooks['response']]
        except KeyError:
            hooks['response'] = response_hook

        return super(MySession, self).request(method, url, hooks=hooks, *args, **kwargs)


def get_node(source, x=".//head/title//text()"):
    root = html.fromstring(source)
    node = root.xpath(x)
    return node


def get_req():
    with MySession(max_workers=4) as session:
        futures = [session.get(i) for i in url]
        for future in as_completed(futures):
            resp = future.result()
            print("状态码", resp.code)
            print("标题", get_node(resp.data)[0])
            print("耗时", resp.elapsed)
            print("=" * 30)


if __name__ == '__main__':
    get_req()
posted @ 2019-12-09 10:31  公众号python学习开发  阅读(1299)  评论(0编辑  收藏  举报