Python爬虫 Day 3

一.Selenium剩余部分
1.元素交互操作
- 点击、清除
click
clear

- ActionChains
是一个动作链对象,需要把driver驱动传给它
动作链对象可以操作一系列设定好的动作

- frame的切换

- 执行js代码

'''
点击、清除
'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome()
try:
    driver.implicitly_wait(10)
    driver.get('https://www.jd.com/')

    # 点击、清除
    input = driver.find_element_by_id('key')
    input.send_keys('围城')

    # 通过class查找搜索按钮
    search = driver.find_element_by_class_name('button')
    search.click() #点击搜索按钮

    time.sleep(3)

    input2 = driver.find_element_by_id('key')
    input2.clear()   #清空输入框

    time.sleep(1)

    input2.send_keys('墨菲定律')
    input2.send_keys(Keys.ENTER)

    time.sleep(10)

finally:
    driver.close()

'''
ActionChains
秒移
'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome()
try:
    driver.implicitly_wait(10)
    driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    time.sleep(5)

    # 遗弃方法
    # driver.switch_to_frame()
    # 新方法
    driver.switch_to.frame('iframeResult')
    time.sleep(1)

    #获取动作链对象
    action = ActionChains(driver)

    #起始方块id:draggable
    source = driver.find_element_by_id('draggable')

    #目标方块id:droppable
    target = driver.find_element_by_id('droppable')

    #方式一、秒移
    #起始方块瞬间移动到目标方块中
    #拟定好一个动作,需要调用执行方法
    action.drag_and_drop(source,target).perform()
    time.sleep(10)

finally:
    driver.close()

'''
ActionChains
一点一点移动
'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome()
try:
    driver.implicitly_wait(10)
    driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    time.sleep(5)

    # 遗弃方法
    # driver.switch_to_frame()
    # 新方法
    driver.switch_to.frame('iframeResult')
    time.sleep(1)


    #起始方块id:draggable
    source = driver.find_element_by_id('draggable')

    #目标方块id:droppable
    target = driver.find_element_by_id('droppable')

    print(source.size)     #大小
    print(source.tag_name)    #标签名
    print(source.text)      #文本
    print(source.location)    #坐标:x与y轴

    # 找到滑动距离
    distance = target.location['x'] - source.location['x']
    ActionChains(driver).click_and_hold(source).perform()

    #方式二、一点一点移动
    s=0
    while s<distance:
        #获取动作链对象
        # 每一次位移s距离
        ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform()
        s += 2
        time.sleep(0.1)

    # 松开起始滑块
    ActionChains(driver).release().perform()
    time.sleep(10)

finally:
    driver.close()


'''
执行js代码
'''
from selenium import webdriver  # 用来驱动浏览器的
import time

driver = webdriver.Chrome()
try:
    driver.implicitly_wait(10)
    driver.get('https://www.baidu.com/')
    driver.execute_script(
        '''
        alert("浙江万里学院是浙江最牛逼的学院")
        '''
    )
    time.sleep(5)

finally:
    driver.close()

其他操作

# 模拟浏览器的前进后退
import time
from selenium import webdriver

browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.get('https://www.taobao.com')
browser.get('http://www.sina.com.cn/')

# 回退
browser.back()
time.sleep(5)
# 前进
browser.forward()
time.sleep(3)
browser.close()

爬取京东商品信息

'''
初级版

'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome()

try:
    driver.implicitly_wait(10)
    #往京东发送请求
    driver.get('https://www.jd.com/')

    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)


    good_list = driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        print(good)
        # 商品名称
        good_name = good.find_element_by_css_selector('.p-name em').text
        print(good_name)

        #商品链接
        good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
        print(good_url)

        #商品价格
        good_price = good.find_element_by_class_name('p-price').text
        print(good_price)

        #商品评价
        good_commit = good.find_element_by_class_name('p-commit').text
        print(good_commit)

        good_content = '''
        num:{}
        商品名称:{}
        商品链接:{}
        商品价格:{}
        商品评价:{}
        \n
        '''.format(num,good_name,good_url,good_price,good_commit)
        print(good_content)

        with open('jd.txt','a',encoding='utf-8') as f:
            f.write(good_content)

    print('商品信息写入成功!')


finally:
    driver.close()
'''
中级版

'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

driver = webdriver.Chrome()

num = 1
try:
    driver.implicitly_wait(10)
    #往京东发送请求
    driver.get('https://www.jd.com/')

    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)

    # 下拉滑动5000px
    js_code = '''
    window.scrollTo(0,5000)
    '''
    driver.execute_script(js_code)

    # 等待商品加载5s
    time.sleep(5)

    good_list = driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        print(good)
        # 商品名称
        good_name = good.find_element_by_css_selector('.p-name em').text
        print(good_name)

        #商品链接
        good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
        print(good_url)

        #商品价格
        good_price = good.find_element_by_class_name('p-price').text
        print(good_price)

        #商品评价
        good_commit = good.find_element_by_class_name('p-commit').text
        print(good_commit)

        good_content = '''
        num:{}
        商品名称:{}
        商品链接:{}
        商品价格:{}
        商品评价:{}
        \n
        '''.format(num,good_name,good_url,good_price,good_commit)
        print(good_content)

        with open('jd.txt','a',encoding='utf-8') as f:
            f.write(good_content)
        num += 1

    print('商品信息写入成功!')

    # 找到下一页并点击
    next_tag = driver.find_elements_by_class_name('pn-next')
    next_tag.click()

    time.sleep(10)

finally:
    driver.close()
'''
狂暴版

'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

def get_good(driver):
    num = 1
    try:
        time.sleep(5)
        # 下拉滑动5000px
        js_code = '''
        window.scrollTo(0,5000)
        '''
        driver.execute_script(js_code)

        # 等待商品加载5s
        time.sleep(5)
        good_list = driver.find_elements_by_class_name('gl-item')
        for good in good_list:
            # 商品名称
            good_name = good.find_element_by_css_selector('.p-name em').text
            #商品链接
            good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
            #商品价格
            good_price = good.find_element_by_class_name('p-price').text
            #商品评价
            good_commit = good.find_element_by_class_name('p-commit').text
            good_content = '''
            num:{}
            商品名称:{}
            商品链接:{}
            商品价格:{}
            商品评价:{}
            \n
            '''.format(num,good_name,good_url,good_price,good_commit)
            print(good_content)
            with open('jd.txt','a',encoding='utf-8') as f:
                f.write(good_content)
            num += 1

        print('商品信息写入成功!')

        # 找到下一页并点击
        next_tag = driver.find_element_by_class_name('pn-next')
        next_tag.click()

        time.sleep(5)
        # 递归调用函数本身
        get_good(driver)
    finally:
        driver.close()

if __name__ == '__main__':
    driver = webdriver.Chrome()
    try:
        driver.implicitly_wait(10)
        #往京东发送请求
        driver.get('https://www.jd.com/')
        # 往京东主页输入框输入墨菲定律,按回车键
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('墨菲定律')
        input_tag.send_keys(Keys.ENTER)
        # 调用获取商品信息函数
        get_good(driver)

    finally:
        driver.close()

 

posted @ 2019-07-03 11:26  MerliahSwift  阅读(112)  评论(0)    收藏  举报