Python 自动化库 Selenium 的使用

基本使用

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait

# webdriver 根据一个或多个贡献者许可协议授权给软件自由保护协会（SFC）。
browser = webdriver.Chrome()
try:
    browser.get('https://www.baidu.com')                        # get() 在当前浏览器会话中加载网页。
    input = browser.find_element_by_id('kw')                    # find_element_by_id() 按ID查找元素
    input.send_keys('Python')                                   # send_keys() 模拟键入元素。
    input.send_keys(Keys.ENTER)                                 # Keys 是一组特殊钥匙代码。ENTER = '\ue007'
    wait = WebDriverWait(browser, 10)                           # WebDriverWait()构造函数，获取WebDriver实例、超时（以秒为单位）
    wait.until(                                                 # until()调用随驱动程序提供的方法作为参数，直到返回值不为假。
        expected_conditions.presence_of_element_located(
            # presence_of_element_located()检查页的DOM中是否存在元素的期望值。
            # 这不一定意味着元素是可见的。locator-用于查找元素，一旦找到该元素，它将返回WebElement。
            (By.ID, 'content_left')                             # By 是支持的定位策略集。ID = "id"
        )
    )
    print(
        browser.current_url,                # current_url属性 获取当前页的URL。
        browser.get_cookies(),              # get_cookies()方法 返回一组字典，对应于当前会话中可见的cookie。
        browser.page_source,                # page_source属性 获取当前页的源码
        sep='\n'
    )

finally:
    browser.close()                         # 关闭当前窗口

声明浏览器对象

from selenium import webdriver

# Selenium支持的一部分浏览器
browser1 = webdriver.Chrome()
browser2 = webdriver.Firefox()
browser3 = webdriver.Edge()
browser4 = webdriver.PhantomJS()
browser5 = webdriver.Safari()

# 完成浏览器对象的初始化并将其赋值为 browser 对象

访问页面

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.taobao.com')               # get()方法请求网页
print(browser.page_source)                          # page_source属性获取网页源代码
browser.close()

查找节点

拿淘宝网为例

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
# 分别利用ID、CSS、XPath三种方式查找节点，以淘宝网源代码中的一个id='q'的节点为例。
input_first = browser.find_element_by_id('q')
input_second = browser.find_element_by_css_selector('#q')
input_third = browser.find_element_by_xpath('//*[@id="q"]')
print(
    'ID方式查找节点:\t', input_first,
    'CSS选择器方式查找节点:\t', input_second,
    'XPath方式查找节点:\t', input_third,
    sep='\n'
)
browser.close()

# 输出：
ID方式查找节点:    
<selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")>
CSS选择器方式查找节点:    
<selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")>
XPath方式查找节点:    
<selenium.webdriver.remote.webelement.WebElement (session="1ec980e4cd9be81c212a1b2285039dd9", element="0.49282688108570993-1")>
# 输出均为WebElement 类型

获取结点的方法

# 获取单个节点的方法：
find_element_by_id
find_element_by_name
find_element_by_xpath
find_element_by_link_text
find_element_by_partial_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector

find_element()方法查找单个节点

from selenium import webdriver
from selenium.webdriver.common.by import By
 
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
input_first = browser.find_element(By.ID, 'q')
print(input_first)
browser.close()


# find_element_by_id(id)就等价于find_element(By.ID, id)，二者得到的结果完全一致。
# By是一个支持的定位策略集

find_elements()方法查找多个节点

from selenium import webdriver
from selenium.webdriver.common.by import By

browser = webdriver.Chrome()
browser.get('https://www.taobao.com')

list1 = browser.find_elements_by_css_selector('.service-bd li')
list2 = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')

print(
    'find_elements_by_css_selector()方法:', list1,
    'find_elements()方法:', list2,
    sep='\n'
)
browser.close()

# 输出：
find_elements_by_css_selector()方法:
[<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-1")>, 
......
<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-16")>]
find_elements()方法:
[<selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-1")>, 
......
 <selenium.webdriver.remote.webelement.WebElement (session="4bcb567fe9900ef1ec4336651fc12a1d", element="0.257017382611505-16")>]

find_element()
和
find_elements()两个例子输出结果一样

节点交互

from selenium import webdriver
import time

browser = webdriver.Chrome()
browser.get('https://www.taobao.com')

input_ = browser.find_element_by_id('q')
input_.send_keys('iPhone')                              # 模拟键盘输入iPhone
time.sleep(1)                                           # time.sleep() 将执行延迟给定秒数。
input_.clear()                                          # 清除输入
input_.send_keys('iPad')                                # 模拟键盘数如iPad
button = browser.find_element_by_class_name('tb-bg')            # 找到一个属性值为tb-bg的节点
button.click()                                                  # 模拟鼠标点击

Selenium驱动浏览器来执行一些操作

动作链

from selenium import webdriver
from selenium.webdriver import ActionChains

browser = webdriver.Chrome()
browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
browser.switch_to.frame('iframeResult')                             # 切换到框架'iframeResult'

source = browser.find_element_by_css_selector('#draggable')         # 要摇曳的节点
target = browser.find_element_by_css_selector('#droppable')         # 摇曳到的目标节点

actions = ActionChains(browser)                     # ActionChains() 创建动作链
actions.drag_and_drop(source, target)               # drag_and_drop(）按住源元素上的鼠标左键，然后移动到目标元素并释放鼠标按钮。
actions.perform()                                   # perform() 执行所有存储的操作。

拖拽前
拖拽后

执行JavaScript

利用 execute_script()方法将进度条下拉到最底部，然后弹出 alert提示框。

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('http://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')

获取节点信息

获取属性

get_attribute()方法可以获取属性

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('http://www.zhihu.com/explore')

logo = browser.find_element_by_id('zh-top-inner')
print(
    logo,
    logo.get_attribute('class'),
    sep='\n'
)
browser.close()

# 输出：
<selenium.webdriver.remote.webelement.WebElement (session="7f325513a2f34aaa95612698d78817e6", element="0.5056570582847388-1")>
zg-wrap modal-shifting clearfix

获取文本值

text属性可以获取文本值

相当于 Beautiful Soup 的 get_text()方法、 pyquery 的 text()方法

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('http://www.zhihu.com/explore')

zhihu = browser.find_element_by_class_name('zu-top-link-logo')
print(zhihu.text)
browser.close()

# 输出：
知乎

获取id、位置、标签名和大小

可以通过id、location、tag_name和size属性，来获取

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('http://www.zhihu.com/explore')

input_ = browser.find_element_by_class_name('zu-top-add-question')
print(
    input_.id,
    input_.location,
    input_.tag_name,
    input_.size,
    sep='\n'
)
browser.close()

# 输出：
0.9009302916784063-1
{'x': 849, 'y': 7}
button
{'height': 32, 'width': 66}

切换Frame

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)

browser.switch_to.frame('iframeResult')                 # switch_to.frame() 切换Frame
try:
    logo = browser.find_element_by_class_name('logo')
except NoSuchElementException:                          # NoSuchElementException 找不到元素时引发
    print('NO LOGO')
browser.switch_to.parent_frame()
logo_ = browser.find_element_by_class_name('logo')
print(logo_)
print(logo_.text)

# 输出：
NO LOGO
<selenium.webdriver.remote.webelement.WebElement (session="d24c9d62b8c5882adec32f3ed55b5d7b", element="0.9706135395535092-2")>
RUNOOB.COM

延时等待

隐式等待

from selenium import webdriver

browser = webdriver.Chrome()
browser.implicitly_wait(10)  # implicitly_wait()隐式等待，默认等待0秒，找不到继续等一会在找，容易受到页面加载时间的影响
browser.get('https://www.zhihu.com/explore')
input = browser.find_element_by_class_name('zu-top-add-question')
print(input)

显式等待

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

browser = webdriver.Chrome()
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser, 10)  # WebDriverWait() 显式等待，不会受页面的加载时间、网络条件的影响
input_ = wait.until(EC.presence_of_element_located((By.ID, 'q')))    # until()调用与驱动程序一起提供的方法作为参数，直到返回值不为假。
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
# presence_of_element_located这个条件，代表节点出现的意思
# element_to_be_clickable这个条件代表可点击的
print(input_, button, sep='\n')

# 输出：
<selenium.webdriver.remote.webelement.WebElement (session="497bc36663dd6ed41d7c59bd6a51982f", element="0.8346683456577526-1")>
<selenium.webdriver.remote.webelement.WebElement (session="497bc36663dd6ed41d7c59bd6a51982f", element="0.8346683456577526-2")>

显示等待的方法有很多，如下

class title_is(object):
    """An expectation for checking the title of a page.
    title is the expected title, which must be an exact match
    returns True if the title matches, false otherwise."""


class title_contains(object):
    """ An expectation for checking that the title contains a case-sensitive
    substring. title is the fragment of title expected
    returns True when the title matches, False otherwise
    """


class presence_of_element_located(object):
    """ An expectation for checking that an element is present on the DOM
    of a page. This does not necessarily mean that the element is visible.
    locator - used to find the element
    returns the WebElement once it is located
    """


class url_contains(object):
    """ An expectation for checking that the current url contains a
    case-sensitive substring.
    url is the fragment of url expected,
    returns True when the url matches, False otherwise
    """


class url_matches(object):
    """An expectation for checking the current url.
    pattern is the expected pattern, which must be an exact match
    returns True if the url matches, false otherwise."""


class url_to_be(object):
    """An expectation for checking the current url.
    url is the expected url, which must be an exact match
    returns True if the url matches, false otherwise."""


class url_changes(object):
    """An expectation for checking the current url.
    url is the expected url, which must not be an exact match
    returns True if the url is different, false otherwise."""


class visibility_of_element_located(object):
    """ An expectation for checking that an element is present on the DOM of a
    page and visible. Visibility means that the element is not only displayed
    but also has a height and width that is greater than 0.
    locator - used to find the element
    returns the WebElement once it is located and visible
    """


class visibility_of(object):
    """ An expectation for checking that an element, known to be present on the
    DOM of a page, is visible. Visibility means that the element is not only
    displayed but also has a height and width that is greater than 0.
    element is the WebElement
    returns the (same) WebElement once it is visible
    """


class presence_of_all_elements_located(object):
    """ An expectation for checking that there is at least one element present
    on a web page.
    locator is used to find the element
    returns the list of WebElements once they are located
    """


class visibility_of_any_elements_located(object):
    """ An expectation for checking that there is at least one element visible
    on a web page.
    locator is used to find the element
    returns the list of WebElements once they are located
    """


class visibility_of_all_elements_located(object):
    """ An expectation for checking that all elements are present on the DOM of a
    page and visible. Visibility means that the elements are not only displayed
    but also has a height and width that is greater than 0.
    locator - used to find the elements
    returns the list of WebElements once they are located and visible
    """


class text_to_be_present_in_element(object):
    """ An expectation for checking if the given text is present in the
    specified element.
    locator, text
    """


class text_to_be_present_in_element_value(object):
    """
    An expectation for checking if the given text is present in the element's
    locator, text
    """


class frame_to_be_available_and_switch_to_it(object):
    """ An expectation for checking whether the given frame is available to
    switch to.  If the frame is available it switches the given driver to the
    specified frame.
    """


class invisibility_of_element_located(object):
    """ An Expectation for checking that an element is either invisible or not
    present on the DOM.

    locator used to find the element
    """


class invisibility_of_element(invisibility_of_element_located):
    """ An Expectation for checking that an element is either invisible or not
    present on the DOM.

    element is either a locator (text) or an WebElement
    """


class element_to_be_clickable(object):
    """ An Expectation for checking an element is visible and enabled such that
    you can click it."""


class staleness_of(object):
    """ Wait until an element is no longer attached to the DOM.
    element is the element to wait for.
    returns False if the element is still attached to the DOM, true otherwise.
    """


class element_to_be_selected(object):
    """ An expectation for checking the selection is selected.
    element is WebElement object
    """


class element_located_to_be_selected(object):
    """An expectation for the element to be located is selected.
    locator is a tuple of (by, path)"""


class element_selection_state_to_be(object):
    """ An expectation for checking if the given element is selected.
    element is WebElement object
    is_selected is a Boolean."
    """


class element_located_selection_state_to_be(object):
    """ An expectation to locate an element and check if the selection state
    specified is in that state.
    locator is a tuple of (by, path)
    is_selected is a boolean
    """


class number_of_windows_to_be(object):
    """ An expectation for the number of windows to be a certain value."""


class new_window_is_opened(object):
    """ An expectation that a new window will be opened and have the number of
    windows handles increase"""


class alert_is_present(object):
    """ Expect an alert to be present."""


def _find_element(driver, by):
    """Looks up an element. Logs and re-raises ``WebDriverException``
    if thrown."""

前进和返回

back()方法，后退到上一个以页面。forward()方法，前进到下一个页面

import time
from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.baidu.com/')
time.sleep(1)
browser.get('https://www.taobao.com/')
time.sleep(1)
browser.get('https://www.zhihu.com/')
time.sleep(1)
browser.back()          # 现在位于https://www.zhihu.com/页面，返回上一个页面即为https://www.taobao.com/页面
time.sleep(1)
browser.forward()       # 现在位于https://www.taobao.com/页面，跳到下一个页面即为https://www.zhihu.com/页面
time.sleep(1)
browser.close()

Cookies

from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())        # 获取Cookies
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})          # 添加Cookie
print(browser.get_cookies())        # 获取Cookies
browser.delete_all_cookies()        # 删除Cookies
print(browser.get_cookies())        # 获取Cookies
browser.close()

# 输出：
[{'domain': '.zhihu.com', 'expiry': 1579115127, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1563347127.1.1.utm......]
[{'domain': 'www.zhihu.com', 'expiry': 2194067127, 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}, {'domain': '.zhihu.com......]
[]

选项卡管理

import time
from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.execute_script('window.open()')
# 调用了execute_script()方法，执行这里传入JavaScript。 window.open() 这个JavaScript语句新开启一个选项卡 。

print(browser.window_handles)
# 调用 window_handles 属性获取当前开启的所有选项卡，返回的是选项卡的代号列表

browser.switch_to.window(browser.window_handles[1])
# 调用 switch_to_window() 方法,切换选项卡

browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to.window(browser.window_handles[0])
browser.get('https://zhihu.com')
browser.close()                     # 关闭当前所在的选项卡

# 输出：
['CDwindow-BBF992DA636EC22831C022F29A7F976A', 'CDwindow-37A0508493A023D6BC1393D11D5F4D9F']

异常处理

from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException

browser = webdriver.Chrome()
try:
    browser.get('https://www.baidu.com')
except TimeoutException:
    print('Time Out')
try:
    browser.find_element_by_id('hello')
except NoSuchElementException:
    print('No Element')
finally:
    browser.close()

# 输出：
No Element

posted @ 2019-07-17 15:41 LeeHua 阅读(475) 评论(0) 收藏举报

刷新页面返回顶部

Lee Hua's Blog

热爱编程 -- 写Bug