selenium+PhantomJS简单爬虫

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on 2017年10月19日
@author:
'''
import time
from selenium import webdriver
from selenium.common import exceptions

#注意:Python文件名或者包名不要命名为selenium,会导致无法导入

def main():
    #browser = webdriver.Ie(executable_path='C:\geckodriver\geckodriver.exe')
    browser = webdriver.PhantomJS(executable_path='C:\geckodriver\phantomjs.exe')
    try:
        browser.get(r'https://.com/index.shtml')
        #time.sleep(5)
        html = browser.find_element_by_css_selector('.banner').get_attribute('innerHTML')
        print(html)
    except exceptions.TimeoutException as ex:
        print(ex)

if __name__ == '__main__':
    main()

 版本说明:

py3.6

selenium3.6

geckodriver-v0.19.0-win64

phantomjs-2.1.1-windows

posted on 2017-10-19 13:03  VincentZhu  阅读(414)  评论(0编辑  收藏  举报