- 在我们编写爬虫文件时有时会发现网页上的数据抓取不到,页面源代码没有相应数据,那么这些就是动态加载的数据,需要使用selenium模块实现网页动态数据的抓取。
-
![]()
-
from selenium import webdriver
from lxml import etree
from time import sleep
#实例化一个浏览器对象(传入浏览器的驱动程序)
bro=webdriver.Chrome(executable_path='./chromedriver.exe')
#发送请求
bro.get('http://www.mafengwo.cn/jd/10208/gonglve.html')
#获取页面源代码
page_text=bro.page_source
#解析数据
tree=etree.HTML(page_text)
t_list=tree.xpath('/html/body/div[2]/div[5]/div/div[1]/ul/li')
for i in t_list:
t=i.xpath('./a/@title')[0]
print(t)
sleep(5)
bro.quit()
-
from selenium import webdriver
from lxml import etree
from time import sleep
#实例化一个浏览器对象(传入浏览器的驱动程序)
bro=webdriver.Chrome(executable_path='./chromedriver.exe')
bro.get('http://www.mafengwo.cn/jd/10208/gonglve.html')
search=bro.find_element_by_id('_j_head_search_input')
search.send_keys('四川美术学院')
submit=bro.find_element_by_id('_j_head_search_link')
submit.click()
sleep(2)
bro.back()
sleep(5)
bro.quit()
- selenium模拟登录
-
![]()
-
from selenium import webdriver
from time import sleep
bro=webdriver.Chrome(executable_path='chromedriver.exe')
bro.get('https://qzone.qq.com/')
bro.switch_to.frame('login_frame')
l=bro.find_element_by_id('switcher_plogin')
l.click()
acount=bro.find_element_by_id('u')
acount.send_keys('66666666')
sleep(3)
pswd=bro.find_element_by_id('p')
pswd.send_keys('1111')
sleep(2)
sub=bro.find_element_by_id('login_button')
sleep(2)
sub.click()
sleep(3)
bro.clos
- 无头浏览器的实现(在运行时不会跳出浏览器运行的过程)与规避检测
-
from selenium import webdriver
from time import sleep
from selenium.webdriver.chrome.options import Options #实现无头浏览器(后台执行,无可视化)
from selenium.webdriver import ChromeOptions #规避检测
#实现无头浏览器
chrome_options=Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
#实现规避检测
option=ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
bro=webdriver.Chrome(executable_path='chromedriver.exe',chrome_options=chrome_options,options=option)
bro.get('https://qzone.qq.com/')
bro.switch_to.frame('login_frame')
l=bro.find_element_by_id('switcher_plogin')
l.click()
acount=bro.find_element_by_id('u')
acount.send_keys('15555555593')
sleep(3)
pswd=bro.find_element_by_id('p')
pswd.send_keys('666')
sleep(2)
sub=bro.find_element_by_id('login_button')
sleep(2)
sub.click()
print('over')
sleep(3)
bro.close()