from selenium import webdriver
from selenium.webdriver.chrome.options import Options
url1 = 'http://www.netbian.com' # 要爬的网址
path = 'chromedriver.exe' # 驱动路径
# 创建一个参数配置对象
chrome_options = Options()
# 不加载图片
prefs = {"profile.managed_default_content_settings.images":2}
chrome_options.add_experimental_option("prefs", prefs)
# 生成浏览器
browser = webdriver.Chrome(path, options=chrome_options)
fengleis = []
data = []
def getFenglei():
browser.get(url1)
for i in range(2, 3 + 1):
fenlei = browser.find_element_by_xpath('//*[@id="header"]/div[1]/ul/li[1]/div/a[%d]'%i).get_attribute('href')
print(fenlei)
fengleis.append(fenlei)
# 获取数据
def getData(url): # 获取数据
# 每页多少张
for i in range(1, 3):
if i != 3:
browser.get(url)
# 获取 缩略图跳到下载页面的地址
li_a = browser.find_element_by_xpath('//*[@id="main"]/div[3]/ul/li[%d]//a' % i).get_attribute('href')
if li_a == 'http://pic.netbian.com/':
continue
# //*[@id="main"]/div[3]/ul/li[2]/a
print(li_a)
browser.get(li_a) # 打开下载页面网址
download = browser.find_element_by_xpath('//*[@id="main"]/div[3]/div/div/a').get_attribute('href')
print(download) # 下载大图地址
browser.get(download)
big_img = browser.find_element_by_xpath('//*[@id="endimg"]/tbody/tr/td/a/img')
img_url = big_img.get_attribute('src')
title = big_img.get_attribute('title')
print(img_url, title)
data.append({'img_url': img_url, 'title': title})
# browser.get(img_url)
# print()
# 翻页
def page(url):
# 爬多少页
for i in range(1, 10):
if i == 1:
getData(url)
else:
# http://www.netbian.com/feizhuliu/index_2.htm
getData(url + 'index_%d.htm' % i)
def downImg():
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
for i in data:
# if i == 5: # 限制下载数量
# break
try:
res = requests.get(i.get('img_url'), headers=headers).content
print(res)
# 下载路径
with open('./all/%s.jpg' % i.get("title"), 'wb') as f:
f.write(res)
print('正在下载' + i.get('title'))
except Exception as e:
print(e)
continue
def run():
getFenglei() # 获取分类
for url in fengleis: # 一类一类的爬
page(url)
# 下载图片
downImg()
run()