#! /usr/bin/python
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
import requests
import time
import os
class Download(object):
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
}
'''
获取下载链接地址:
'''
def list_url(self,num):
list_url = []
for i in range(1,num):
if i == 1:
url = 'http://www.shuaia.net/meinv/'
else:
url = 'http://www.shuaia.net/meinv/index_%d.html' % num
req = requests.get(url=url,headers=self.headers)
req.encoding = 'utf-8'
html = req.text
bf = BeautifulSoup(html,'lxml')
targets_url = bf.find_all(class_ = "item-img")
for each in targets_url:
list_url.append(each.img.get('alt') + '=' + each.get('href'))
time.sleep(3)
print("链接地址采集完成")
#print(list_url)
return list_url
'''
下载图片
'''
def download_img(self,list_url):
for each_img in list_url:
img_info = each_img.split('=')
targets_url = img_info[1]
filename = img_info[0] + '.jpg'
print('下载:' + filename)
img_req = requests.get(url=targets_url,headers = self.headers)
img_req.encoding = 'utf-8'
img_html = img_req.text
img_bf_1 = BeautifulSoup(img_html,'lxml')
img_url = img_bf_1.find_all('div',class_ = 'wr-single-content')
img_bf_2 = BeautifulSoup(str(img_url), 'lxml')
img_url_all = img_bf_2.img.get('src')
urlretrieve(url=img_url_all, filename='D:\\PycharmProjects1\\images\\' + filename)
time.sleep(10)
if __name__ == '__main__':
dow = Download()
list_url = dow.list_url(2)
dow.download_img(list_url)