1 """
2 关键是找到正确的url,然后判断组成url的参数的规律,多观察几个url即可得出规律
3 """
4
5 import requests
6 from fake_useragent import UserAgent
7 import os
8 from lxml import etree
9 import json
10 import time
11
12 headers = {'User-Agent': UserAgent().random}
13
14 keyword = "周星驰"
15 url = 'https://cn.bing.com/images/async?q={}&first={}&count=35&relp={}&tsc=ImageHoverTitle&mmasync=1'
16
17 if not os.path.exists(keyword):
18 os.mkdir(keyword)
19 os.chdir(keyword)
20
21 first = 35
22 relp = 35
23 count = 1
24
25 while True:
26 try:
27 html_str = requests.get(url.format(keyword, first, relp), headers=headers).content.decode()
28 html = etree.HTML(html_str)
29 pics_url = html.xpath("//a[@class='iusc']/@m")
30
31 for pic_url in pics_url:
32 pic_url = json.loads(pic_url)['turl']
33 res = requests.get(pic_url, headers=headers).content
34
35 with open('{}.jpg'.format(count), "wb") as f:
36 f.write(res)
37
38 print('第{}张已下载完成'.format(count))
39
40 count += 1
41 time.sleep(0.5)
42
43 first += 35
44 relp += 35
45
46 if first > 35 * 5:
47 break
48
49 except Exception as e:
50 print(e)