Python简单爬取Amazon图片-其他网站相应修改链接和正则
简单爬取Amazon图片信息
这是一个简单的模板,如果需要爬取其他网站图片信息,更改URL和正则表达式即可
1 import requests 2 import re 3 import os 4 def down_pics(html): 5 cwd = os.getcwd() 6 i=0 7 url_pics = re.findall('<img alt=".*?" src="(.*?)" height="', str(html.text), re.S) 8 print(url_pics) 9 for each in url_pics: 10 print('正在下载第' + str(i) + '张图片,图片地址:' + str(each)) 11 try: 12 pic = requests.get(each, timeout=10) 13 except requests.exceptions.ConnectionError: 14 print('错误!当前图片无法下载') 15 continue 16 dir = cwd + '\\images_amazon\\' + str(i) + '.jpg'#运行路径下自己手动新建一个images_amazon文件加,存放图片 17 i+=1 18 print(dir) 19 with open(dir, 'wb') as file: 20 file.write(pic.content) 21 #这个部分主要是解释一下,如果返回的url_pics不仅仅是图片链接,还有其他信息,如何下载图片 22 # url_pics = re.findall('"price": "(.*?)".*?"image": "(.*?)",', str(html.text), re.S) 23 # print(url_pics) 24 # print('找到shecharme_bestseller产品,现在开始下载图片……') 25 # for j in range(len(url_pics)): 26 # # print(url_pics[0][j]) 27 # print('正在下载第' + str(j) + '张图片,图片地址:' + str(url_pics[j][1])) 28 # try: 29 # pic = requests.get(url_pics[j][1], timeout=10) 30 # except requests.exceptions.ConnectionError: 31 # print('错误!当前图片无法下载') 32 # continue 33 # dir = cwd + '\\images_amazon\\' + str(j) + '.jpg' 34 # 35 # print(dir) 36 # with open(dir, 'wb') as file: 37 # file.write(pic.content) 38 if __name__ == '__main__': 39 url = 'https://www.amazon.com/Best-Sellers-Womens-Clothing/zgbs/fashion/1040660/ref=zg_bs_nav_2_7147440011' 40 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'} 41 result = requests.get(url, headers) 42 down_pics(result)

浙公网安备 33010602011771号