python爬虫之多线程爬取菜价
import requests, csv,time from concurrent.futures import ThreadPoolExecutor f = open("data.csv", mode="w", encoding="utf-8") csvwriter = csv.writer(f) #获取程序开始运行时间 now1=time.time() def download_one_page(url,headers,data): time.sleep(2)#增加爬取次数之间的时间间隔 resp = requests.post(url, headers=headers, data=data) print(time.strftime("%Y-%m-%d %H:%M:%S"),resp.status_code) list = resp.json().get('list') for dic in list: txt=(dic.get("prodCat"), dic.get("prodName"), dic.get("lowPrice"), dic.get("avgPrice"), dic.get("highPrice"), dic.get("specInfo"), dic.get("place"), dic.get("unitInfo"), dic.get("pubDate")) csvwriter.writerow(txt) if __name__ == '__main__': with ThreadPoolExecutor(5) as t: for i in range(1,13845): url = "http://www.xinfadi.com.cn/getPriceData.html" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.39", "Referer": "http://www.xinfadi.com.cn/priceDetail.html" } data = { "limit": "20", "current":i } t.submit(download_one_page(url,headers,data)) print("第",i,"页下载完毕") #获取程序运行完时间 now2 = time.time() print("全部下载完毕,耗时:",int(now2-now1),"秒")
开源改变生活

浙公网安备 33010602011771号