协程异步请求接口的demo

在公司的时候,接到了一个需求,清洗物料之后,基于清洗后的物料,请求某一个接口。清洗完物料之后发现,需要请求某一个接口70000次.... 当初就为了快速开发,直接使用了,单线程请求。最后足足跑了40多分钟,实在是太低效了。想到python 可以使用协程异步请求io,可以快速请求。
另外可以使用协程的信号量 asyncio.Semaphore() 很好的控制并发。
在asyncio.Semaphore() 控制并发量为100 时候,1000次请求大概 4s 就完成了请求。极大了提升了io 的效率。

import re
from random import sample
import traceback
import urllib.request
import json
from concurrent.futures import ThreadPoolExecutor
import aiohttp
import asyncio
import requests
count = 0

CONCURRENCY = 100
semaphore = asyncio.Semaphore(CONCURRENCY)
session = None
i = 0



async def request_url(URL):
    global i
    async with semaphore:
        i = i + 1
        print(i)
        async with session.get(URL) as response:
            res = await response.text()
            res_data = json.loads(res)
            return res_data            

async def req_url(ele):
    try:
        #lock.acquire()
        ele = ele.strip()
        url = "http://gexxxxxxxxxquerydata2.php?condition=%s&format=json" % (ele)
        res = await request_url(url)
        zhuan_num = res.get("FWNUM", 0)
        ping_num = res.get("CMTNUM", 0)
        zan_num = res.get("LIKENUM", 0)
        res = {"zhuan": int(zhuan_num), "ping": int(ping_num), "zan": int(zan_num)}
        return res
    except Exception as e:
        traceback.print_exc()
        return None


async def get_coroutines(mids):
    global session
    session = aiohttp.ClientSession()
    scrape_list = [asyncio.ensure_future(req_url(mid)) for mid in mids]
    list_value = await asyncio.gather(*scrape_list)
    print(list_value)

if __name__ == '__main__':
    need_analysis = []
    query_type = {}
    with open("/data1/shilin5/statistics_py/wisanalysis-009.log") as f:
        lines =  f.readlines()
        for line in lines:
            pattern = r"\[(\d+)\].*query_type is (\d+)"
            match = re.search(pattern, line)

            if match:
                result = (match.group(1), match.group(2))    
                query_type[result[0]] = result[1]
    
    with open("/data1/shilin5/statistics_py/materialclean-009.log") as f:
        lines =  f.readlines()
        for line in lines:
            pattern = re.compile(r'\[(.*?)\]') 
            group = pattern.findall(line) 
            trace_id = group[0]
            if trace_id in query_type.keys() and query_type[trace_id] == '1':
                question = group[1]
                mids = str(group[2]).replace('\'',"").replace(' ','').split(',')
                if question == "typical_viewpoint":
                    if mids == ['']:
                        continue
                    need_analysis.append(mids)

    sample_lists = sample(need_analysis, 50)
    mids = [mid for mids in sample_lists for mid in mids]
    mids = mids[:1000]
    asyncio.get_event_loop().run_until_complete(get_coroutines(mids))

关于aiohttp 的一个帖子
https://cuiqingcai.com/202272.html

posted @ 2024-03-26 23:10  wsl-hitsz  阅读(3)  评论(0编辑  收藏  举报