import requests
from urllib.parse import urlencode
from multiprocessing import Pool#开启多进程
from requests.exceptions import RequestException #
import re
import json
from hashlib import md5
def page_get(url):
try:
html = requests.get(url)
if html.status_code == 200:
return html.text
return None
except RequestException:
print('请求失败')
return None
def page_html(pn):
data = {
'tn': 'resultjson_com',
'ipn': 'rj',
'ct': 201326592,
'is': '',
'fp': 'result',
'queryWord': '清晰图片',
'cl': 2,
'lm': -1,
'ie': 'utf-8',
'oe': 'utf-8',
'adpicid': '',
'st': -1,
'z': 0,
'ic': 0,
'hd': 0,
'latest': 0,
'copyright': 0,
'word': '清晰图片',
's': '',
'se': '',
'tab': '',
'width': 1920,
'height': 1080,
'face': '',
'istype': '',
'qc': '',
'nc': 1,
'fr': '',
'expermode': '',
'force': '',
'pn': pn,
'rn': 30,
'gsm': '1e',
'1561179768452': ''
}
url = 'https://image.baidu.com/search/acjson?'+ urlencode(data)
html = page_get(url)
for item in page_re(html):
print(item)
def page_re(html):
srt = re.compile('.*?fromPageTitle":"(.*?)",.*?thumbURL":"(.*?)",.*?middleURL":"(.*?)",.*?hoverURL":"(.*?)",',re.S)
srt = re.findall(srt,html)
for item in srt:
wrire_to(item[1])
yield {
'名称':item[0],
'img':item[1]
}
def wrire_to(url):
try:
html = requests.get(url)
if html.status_code == 200:
asve_img(html.content)
return None
except RequestException:
print('请求失败')
return None
def asve_img(content):
file_path = '{0}/{1}.{2}'.format(r'C:\Users\Administrator\Desktop\img',md5(content).hexdigest(), 'jpg')
# if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(content)
f.close()
def main():
# for i in range(10):
# page_html(i*30)
pool = Pool()
pool.map(page_html, [i*30 for i in range(10)])
# html = page_html(30)
if __name__ == '__main__':
main()