修改requests_html.AsyncHTMLSessions使得支持url参数

一、修改源代码

#重写AsyncHTMLSession中的run()方法

    def run(self, *coros,urls=None):
        """ Pass in all the coroutines you want to run, it will wrap each one
            in a task, run it and wait for the result. Return a list with all
            results, this is returned in the same order coros are passed in. """
        if urls:
            if isinstance(urls,list):
                tasks = [
                    asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro(urls)) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
        else:
            tasks = [
                asyncio.ensure_future(coro()) for coro in coros
            ]
            done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
            return [t.result() for t in done]

二、测试

from requests_html import AsyncHTMLSession

asession = AsyncHTMLSession()

async def get_link(link):
    res = await asession.get(link)
    return res.html.absolute_links

url="https://www.cnblogs.com/"
results = asession.run(get_link,urls=url)
print(results)

url=["https://www.cnblogs.com/","https://www.jd.com"]
results = asession.run(get_link,urls=url)
print(results)

三、或者新建一个NewAsyncHTMLSession继承

from requests_html import AsyncHTMLSession
import asyncio

class NewAsyncHTMLSession(AsyncHTMLSession):
    def run(self, *coros,urls=None):
        """ Pass in all the coroutines you want to run, it will wrap each one
            in a task, run it and wait for the result. Return a list with all
            results, this is returned in the same order coros are passed in. """
        if urls:
            if isinstance(urls,list):
                tasks = [
                    asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro(urls)) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
        else:
            tasks = [
                asyncio.ensure_future(coro()) for coro in coros
            ]
            done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
            return [t.result() for t in done]

asession = NewAsyncHTMLSession()

async def get_link(link):
    res = await asession.get(link)
    return res.html.absolute_links

url="https://www.cnblogs.com/"
results = asession.run(get_link,urls=url)
print(results)

url=["https://www.cnblogs.com/","https://www.jd.com"]
results = asession.run(get_link,urls=url)
print(results)

 

posted @ 2020-11-02 13:15  Maple_feng  阅读(679)  评论(0编辑  收藏  举报