python 通过asyncio 协程来下载图片
python 通过asyncio 协程的方式来下载图片
""" 以协程的方式下载图片 """ import os import sys import asyncio import aiohttp import traceback sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..' + '/..')) from db_models import OverseasProductDetail from urllib.parse import urlparse dst_base_path = "/backup/bak38/t_dataset/overseas_products_data/accessories_categories" # 限制最大并发数 semaphore = asyncio.Semaphore(20) def get_filename_from_url(url): parsed = urlparse(url) return os.path.basename(parsed.path) async def download_img(session, url, save_path, file_name): """ 协程方式下载图片或视频 """ try: async with semaphore: if not os.path.exists(save_path): os.makedirs(save_path) save_img_path = os.path.join(save_path, file_name) if os.path.exists(save_img_path): return async with session.get(url, timeout=aiohttp.ClientTimeout(total=15)) as resp: if resp.status == 200: content = await resp.read() with open(save_img_path, "wb") as f: f.write(content) except Exception as e: print(f"下载失败: {url} 错误: {e}") async def process_product(session, overseas_product_detail): try: id = overseas_product_detail.id product_id = overseas_product_detail.product_id product_photos = overseas_product_detail.product_photos product_detail = overseas_product_detail.product_detail if not product_photos or not product_detail: return keyword = overseas_product_detail.keyword chinese_name = overseas_product_detail.chinese_name product_title = product_detail.get("product_title") about_product = product_detail.get("about_product") country = product_detail.get("country", "") product_videos = product_detail.get("product_videos") dst_path = os.path.join(dst_base_path, f"{keyword}|{chinese_name}", country, product_id) os.makedirs(dst_path, exist_ok=True) product_title_path = os.path.join(dst_path, "product_title.txt") if not os.path.exists(product_title_path): with open(product_title_path, "w") as f: f.write(f"{product_title}") about_product_path = os.path.join(dst_path, "about_product.txt") if not os.path.exists(about_product_path): with open(about_product_path, "w") as f: f.write(f"{about_product}") tasks = [] for photo_url in product_photos: file_name = get_filename_from_url(photo_url) tasks.append(download_img(session, photo_url, dst_path, file_name)) for video in product_videos: video_url = video.get("video_url") file_name = get_filename_from_url(video_url) tasks.append(download_img(session, video_url, dst_path, file_name)) await asyncio.gather(*tasks) OverseasProductDetail.set_download_img(overseas_product_detail_id=id, download_img=1) print(f"{dst_path} 下载完成") except Exception as e: print(e) traceback.print_exc() async def main(): page = 1 search_num = 0 async with aiohttp.ClientSession() as session: while True: overseas_product_details = OverseasProductDetail.get_products_by_page(page=page, page_size=30) if not overseas_product_details: break tasks = [] for detail in overseas_product_details: tasks.append(process_product(session, detail)) await asyncio.gather(*tasks) page += 1 search_num += 1 if __name__ == '__main__': asyncio.run(main())
 
                    
                 
                
            
         
 浙公网安备 33010602011771号
浙公网安备 33010602011771号