Python简单爬取Amazon图片-其他网站相应修改链接和正则

简单爬取Amazon图片信息

这是一个简单的模板,如果需要爬取其他网站图片信息,更改URL和正则表达式即可

 1 import requests
 2 import re
 3 import os
 4 def down_pics(html):
 5     cwd = os.getcwd()
 6     i=0
 7     url_pics = re.findall('<img alt=".*?" src="(.*?)" height="', str(html.text), re.S)
 8     print(url_pics)
 9     for each in url_pics:
10         print('正在下载第' + str(i) + '张图片,图片地址:' + str(each))
11         try:
12             pic = requests.get(each, timeout=10)
13         except requests.exceptions.ConnectionError:
14             print('错误!当前图片无法下载')
15             continue
16         dir = cwd + '\\images_amazon\\' + str(i) + '.jpg'#运行路径下自己手动新建一个images_amazon文件加,存放图片
17         i+=1
18         print(dir)
19         with open(dir, 'wb') as file:
20             file.write(pic.content)
21     #这个部分主要是解释一下,如果返回的url_pics不仅仅是图片链接,还有其他信息,如何下载图片
22     # url_pics = re.findall('"price": "(.*?)".*?"image": "(.*?)",', str(html.text), re.S)
23     # print(url_pics)
24     # print('找到shecharme_bestseller产品,现在开始下载图片……')
25     # for j in range(len(url_pics)):
26     #     # print(url_pics[0][j])
27     #     print('正在下载第' + str(j) + '张图片,图片地址:' + str(url_pics[j][1]))
28     #     try:
29     #         pic = requests.get(url_pics[j][1], timeout=10)
30     #     except requests.exceptions.ConnectionError:
31     #         print('错误!当前图片无法下载')
32     #         continue
33     #     dir = cwd + '\\images_amazon\\' + str(j) + '.jpg'
34     #
35     #     print(dir)
36     #     with open(dir, 'wb') as file:
37     #         file.write(pic.content)
38 if __name__ == '__main__':
39     url = 'https://www.amazon.com/Best-Sellers-Womens-Clothing/zgbs/fashion/1040660/ref=zg_bs_nav_2_7147440011'
40     headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
41     result = requests.get(url, headers)
42     down_pics(result)

 

posted @ 2020-04-19 10:07  白月如初12138  阅读(869)  评论(0)    收藏  举报