六、aiphttp高性能异步爬虫实现

import asyncio #协程模块引入
import time
import aiohttp #实现异步操作的模块用于替代request模块(同步模块)

start = time.time()
urls =['http://XXX','http://xxx1']#异步请求列表

async def get_page(url):
    #进行异步处理(get,post)
    #也可以加入headers(UA伪装)
    #使用params进行传参数
    #代理ip设定proxy = 'http//XXXX'
    async with aiohttp.ClientSession() as session:

       async with session.get(url) as response:
         #text()返回字符串形式的相应数据
         #read()返回二进制形式的相应数据
         #json()返回json对象
         page_text = response.text()

tasks = []

for url in urls:
    c = get_page(url)
    task = asyncio.ensure_future(c)
    tasks.append(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print('总耗时:',end)

 

posted @ 2022-08-14 11:05  机械猿  阅读(107)  评论(0)    收藏  举报