Python爬虫 Day 3
一.Selenium剩余部分
1.元素交互操作
- 点击、清除
click
clear
- ActionChains
是一个动作链对象,需要把driver驱动传给它
动作链对象可以操作一系列设定好的动作
- frame的切换
- 执行js代码
''' 点击、清除 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver import ActionChains # 破解滑动验证码的时候用的 可以拖动图片 from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time driver = webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.jd.com/') # 点击、清除 input = driver.find_element_by_id('key') input.send_keys('围城') # 通过class查找搜索按钮 search = driver.find_element_by_class_name('button') search.click() #点击搜索按钮 time.sleep(3) input2 = driver.find_element_by_id('key') input2.clear() #清空输入框 time.sleep(1) input2.send_keys('墨菲定律') input2.send_keys(Keys.ENTER) time.sleep(10) finally: driver.close() ''' ActionChains 秒移 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver import ActionChains # 破解滑动验证码的时候用的 可以拖动图片 from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time driver = webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') time.sleep(5) # 遗弃方法 # driver.switch_to_frame() # 新方法 driver.switch_to.frame('iframeResult') time.sleep(1) #获取动作链对象 action = ActionChains(driver) #起始方块id:draggable source = driver.find_element_by_id('draggable') #目标方块id:droppable target = driver.find_element_by_id('droppable') #方式一、秒移 #起始方块瞬间移动到目标方块中 #拟定好一个动作,需要调用执行方法 action.drag_and_drop(source,target).perform() time.sleep(10) finally: driver.close() ''' ActionChains 一点一点移动 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver import ActionChains # 破解滑动验证码的时候用的 可以拖动图片 from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time driver = webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') time.sleep(5) # 遗弃方法 # driver.switch_to_frame() # 新方法 driver.switch_to.frame('iframeResult') time.sleep(1) #起始方块id:draggable source = driver.find_element_by_id('draggable') #目标方块id:droppable target = driver.find_element_by_id('droppable') print(source.size) #大小 print(source.tag_name) #标签名 print(source.text) #文本 print(source.location) #坐标:x与y轴 # 找到滑动距离 distance = target.location['x'] - source.location['x'] ActionChains(driver).click_and_hold(source).perform() #方式二、一点一点移动 s=0 while s<distance: #获取动作链对象 # 每一次位移s距离 ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform() s += 2 time.sleep(0.1) # 松开起始滑块 ActionChains(driver).release().perform() time.sleep(10) finally: driver.close() ''' 执行js代码 ''' from selenium import webdriver # 用来驱动浏览器的 import time driver = webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.baidu.com/') driver.execute_script( ''' alert("浙江万里学院是浙江最牛逼的学院") ''' ) time.sleep(5) finally: driver.close()
其他操作
# 模拟浏览器的前进后退 import time from selenium import webdriver browser=webdriver.Chrome() browser.get('https://www.baidu.com') browser.get('https://www.taobao.com') browser.get('http://www.sina.com.cn/') # 回退 browser.back() time.sleep(5) # 前进 browser.forward() time.sleep(3) browser.close()
爬取京东商品信息
''' 初级版 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time driver = webdriver.Chrome() try: driver.implicitly_wait(10) #往京东发送请求 driver.get('https://www.jd.com/') input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(5) good_list = driver.find_elements_by_class_name('gl-item') for good in good_list: print(good) # 商品名称 good_name = good.find_element_by_css_selector('.p-name em').text print(good_name) #商品链接 good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href') print(good_url) #商品价格 good_price = good.find_element_by_class_name('p-price').text print(good_price) #商品评价 good_commit = good.find_element_by_class_name('p-commit').text print(good_commit) good_content = ''' num:{} 商品名称:{} 商品链接:{} 商品价格:{} 商品评价:{} \n '''.format(num,good_name,good_url,good_price,good_commit) print(good_content) with open('jd.txt','a',encoding='utf-8') as f: f.write(good_content) print('商品信息写入成功!') finally: driver.close()
''' 中级版 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time driver = webdriver.Chrome() num = 1 try: driver.implicitly_wait(10) #往京东发送请求 driver.get('https://www.jd.com/') input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(5) # 下拉滑动5000px js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) # 等待商品加载5s time.sleep(5) good_list = driver.find_elements_by_class_name('gl-item') for good in good_list: print(good) # 商品名称 good_name = good.find_element_by_css_selector('.p-name em').text print(good_name) #商品链接 good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href') print(good_url) #商品价格 good_price = good.find_element_by_class_name('p-price').text print(good_price) #商品评价 good_commit = good.find_element_by_class_name('p-commit').text print(good_commit) good_content = ''' num:{} 商品名称:{} 商品链接:{} 商品价格:{} 商品评价:{} \n '''.format(num,good_name,good_url,good_price,good_commit) print(good_content) with open('jd.txt','a',encoding='utf-8') as f: f.write(good_content) num += 1 print('商品信息写入成功!') # 找到下一页并点击 next_tag = driver.find_elements_by_class_name('pn-next') next_tag.click() time.sleep(10) finally: driver.close()
''' 狂暴版 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver.common.keys import Keys # 键盘按键操作 import time def get_good(driver): num = 1 try: time.sleep(5) # 下拉滑动5000px js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) # 等待商品加载5s time.sleep(5) good_list = driver.find_elements_by_class_name('gl-item') for good in good_list: # 商品名称 good_name = good.find_element_by_css_selector('.p-name em').text #商品链接 good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href') #商品价格 good_price = good.find_element_by_class_name('p-price').text #商品评价 good_commit = good.find_element_by_class_name('p-commit').text good_content = ''' num:{} 商品名称:{} 商品链接:{} 商品价格:{} 商品评价:{} \n '''.format(num,good_name,good_url,good_price,good_commit) print(good_content) with open('jd.txt','a',encoding='utf-8') as f: f.write(good_content) num += 1 print('商品信息写入成功!') # 找到下一页并点击 next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(5) # 递归调用函数本身 get_good(driver) finally: driver.close() if __name__ == '__main__': driver = webdriver.Chrome() try: driver.implicitly_wait(10) #往京东发送请求 driver.get('https://www.jd.com/') # 往京东主页输入框输入墨菲定律,按回车键 input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) # 调用获取商品信息函数 get_good(driver) finally: driver.close()

浙公网安备 33010602011771号