安装Selenium库以及下载浏览器补丁 并且爬取动态网页
1.安装Selenium库
conda install Selenium 即可
2.下载浏览器补丁
- 链接:https://pan.baidu.com/s/1y4KnEMXbgsEjEEPlbpxNqw 提取码:1j8m
- 下载好补丁,然后放在python同根目录下

3.爬取动态网
from selenium import webdriver
driver = webdriver.Chrome() #连接打开谷歌
driver.get('http://www.ptpress.com.cn/shopping/index')
data = driver.page_source #获取谷歌网页数据后,相当于静态页面爬取
print(data)
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#等待十秒打开网页
wait = WebDriverWait(driver,10)
comfirm_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.classifySearch-p > div > div.classifySearchBar > div.allSearch > a > i')))
print(comfirm_btn)
#关闭浏览器
driver.close()
driver = webdriver.Chrome()
driver.get('http://www.ptpress.com.cn/shopping/index')
#拓展多一个标签
driver.execute_script('window.open()')
print(driver.window_handles)
driver.switch_to_window(driver.window_handles[1]) #
driver.get('http://www.tipdm.com')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome() #连接打开谷歌
#不能用首页网址,要用跳转后的网址,否则无法进行搜索
driver.get('http://www.ptpress.com.cn/shopping/search?tag=search&searchName=')
#等待十秒打开网页
wait = WebDriverWait(driver,10)
comfirm_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#search > div.classifySearch-p > div > div.classifySearchBar > div.allSearch > a > i')))
#控制网页在目前页
comfirm_btn.click()
#将页面跳转到底部
driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
#命令在页面显示
driver.execute_script('alert("python 爬虫")')
#driver.close()
#获取搜索输入的文本位置
input_first = driver.find_element_by_css_selector('#search > div.classifySearch-p > div > div.classifySearchBar > div.allSearch > input')
print(input_first)
input_second = driver.find_element_by_xpath('//*[@id="search"]/div[1]/div/div[1]/div[2]/input')
print(input_second)
#在获取的文本框位置中输入内容
input_first.send_keys('python编程')
lis = driver.find_element_by_css_selector('#head > div > nav.head-nav-fl.fl')
print(lis)
#find_element(方法,查找的位置)
lis0 = driver.find_element(By.CSS_SELECTOR,'#head > div > nav.head-nav-fl.fl')
print(lis0)
#爬取动态网页信息的运用
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
driver.get('http://www.ptpress.com.cn/shopping/search?tag=search&searchName=')
search_btn = driver.find_element_by_css_selector('#search > div.classifySearch-p > div > div.classifySearchBar > div.allSearch > input')
search_btn.send_keys('python编程')
confirm_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#search > div.classifySearch-p > div > div.classifySearchBar > div.allSearch > a > i')))
confirm_btn.click()
time.sleep(5)
html = driver.page_source
soup = BeautifulSoup(html,'lxml')
book = soup.select('.bookList')
li = soup.select('.item > p > a')
name = [li[i].text for i in range(0,len(li),2)]
print(name)
price = [li[i].text for i in range(1,len(li),2)]
print(price)
浙公网安备 33010602011771号