urllib3爬取流程
利用urllib3爬取百度图片首页图片
import os import urllib3 import re from fake_useragent import UserAgent user_agent= UserAgent() ua = user_agent.random # 1.找到目标数据 url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&fm=index&pos=history&word=meinv' # 2.分析请求流程 manager =urllib3.PoolManager() r = manager.request('GET', url) text = r.data.decode('utf-8') # 正则匹配获取图片url html_img = re.findall('"thumbURL":"(.*?)"',text) # print(html_img) headers = { "user-agent":ua } image = enumerate(html_img) #枚举 # print(image) for index, value in image: r = manager.request('GET',value, headers=headers) # #4.提取数据 ima_data =r.data # print(ima_data) if not os.path.exists("meinv"): os.mkdir("meinv") img_file = "meinv/"+str(index)+".jpg" with open(img_file,"wb") as f: f.write(ima_data)

浙公网安备 33010602011771号