#conding:utf-8
import unittest
from selenium import webdriver
from urllib.request import *
import re
import time
from bs4 import BeautifulSoup
#测试类
class baidupic(unittest.TestCase):
#初始化测试
def setUp(self):
self.dv = webdriver.PhantomJS()
#测试方法
def test_getPic(self):
dv = self.dv
dv.get("http://image.baidu.com/")
dv.find_element_by_id("kw").send_keys("美女")
dv.find_element_by_class_name("s_btn").click()
time.sleep(1)
#滚轮到最下面,滚动的次数越多,下载的美女图片就越多
js = "window.scrollTo(0, document.body.scrollHeight)"
dv.execute_script(js)
time.sleep(1)
dv.execute_script(js)
time.sleep(1)
#正则获取图片地址,宽度,高度,后缀
pattern = re.compile(u'data-objurl="(.*?)" data-thumburl=".*?" data-fromurl=".*?" data-fromurlhost=".*?" data-ext="(.*?)" data-saved=".*?" data-pi=".*?" data-specialtype=".*?" data-cs=".*?" data-width="(.*?)" data-height="(.*?)" data-hostname=',re.S)
items = re.findall(pattern,dv.page_source)
index = 1
for item in items:
print("图片地址:%s\r\n类型:%s\r\n宽度:%s\r\n高度:%s\r\n " % (item[0],item[1],item[2],item[3]))
try:
self.saveImg(item[0],"d:\\mm\\%s.%s"%(index,item[1]))
except:
continue
index = index + 1
#保存图片到本地
def saveImg(self,imgURL,fileName):
img = urlopen(imgURL)
data = img.read()
f = open(fileName,"wb")
f.write(data)
f.close()
#结束测试
def tearDown(self):
self.dv.quit()