这是循环请求10次页面。总时间大概是10秒左右,如果是普通的循环请求10次页面而不添加异步的话,时间大概在30秒以上,当然这个数据可能有误,因为有网速的问题存在,但大体的效果应该是不变的。
import aiohttp
from bs4 import BeautifulSoup
import asyncio
import time
async def html_list(url,headers):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers, timeout=10) as response:
if response.status != 200:
print('请求失败:{}'.format(response.status))
return await html_list(url, headers) # 返回状态码不是200,将挂起协程并执行其他协程
else:
response.encoding = 'gbk'
return await response.text() # 挂起协程执行其他协程,当其他协程完成或者挂起时,继续执行此协程
except Exception as e:
print(e)
async def html_detail(html):
html1 = BeautifulSoup(html,'lxml')
html2 = html1.select('.tit')
for i in html2:
print(i.get_text())
def html_index(url,headers):
for i in range(10):
loop = asyncio.get_event_loop() # 调用协程
html = loop.run_until_complete(html_list(url,headers)) # 启动第一个协程
loop.run_until_complete(html_detail(html)) # 启动第二个协程
def main():
url = 'http://www.meizitu.com/a/pure.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3393.4 Safari/537.36',
}
loop = asyncio.new_event_loop() # 创建一个协程
html_index(url,headers)
asyncio.set_event_loop(loop) # 设置协程添加到事件循环中
if __name__ == '__main__':
a = time.time()
main()
b = time.time()
print(b-a)