selenium库的使用

#能不能让我的程序连接到浏览器，让浏览器完成各种复杂操作，我们只接受最终结果，不然人家一加密，你人傻了
#selenium:自动化测试工具
#可以打开浏览器，像人一样操纵浏览器
#我们可以从selenum中直接提取网页中的各种信息
#环境搭建：pip install selenium -i 清华源
#   下载浏览器驱动，https://blog.csdn.net/weixin_38863166/article/details/105113041教程，把解压缩的浏览器驱动放在python解释器所在的文件夹
 #让selenium启动浏览器
from selenium.webdriver import Chrome
 #1.创建浏览器对象
web=Chrome()
 #2.打开一个网址
web.get('http://www.baidu.com')
print(web.title)


from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
 #1.创建浏览器对象
web=Chrome()
 #2.打开一个网址
web.get('http://www.lagou.com')
#找到某个元素点击
el=web.find_element_by_xpath('/html/body/div[10]/div[1]/div[2]/div[2]/div[1]/div/p[1]/a')
el.click()#点击/html/body/div[7]/div[1]/div[1]/div[1]/form/input[1]
time.sleep(1)#让浏览器缓一会，因为我们的程序比网站ajax请求快很多
#找到输入框，输入python   输入回车/点击搜索按钮
web.find_element_by_xpath('/html/body/div[7]/div[1]/div[1]/div[1]/form/input[1]').send_keys('python',Keys.ENTER)
time.sleep(1)
#查找存放数据的位置，进行数据提取
#找到页面中存放数据的所有div
div_list=web.find_elements_by_xpath('/html/body/div[1]/div[2]/div/div[2]/div[3]/div/div[1]/div')
for div in div_list:
    job_name=div.find_element_by_tag_name('a').text#找标签类型名
    job_price=div.find_element_by_xpath('./div[1]/div[1]/div[2]/span').text#重点是数第几个div
    print(job_name,job_price)

一次性选中并修改多个相同的变量

1.将光标置于要修改的变量名后面
2.多次按alt+j，这样就可以在相同的变量名后面添加光标
3.此时可以同时删除并修改这些已经选中的变量名了。

from  selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
web=Chrome()
'''web.get('http://www.lagou.com')
web.find_element(By.XPATH,'/html/body/div[10]/div[1]/div[2]/div[2]/div[1]/div/p[1]/a').click()
time.sleep(1)
web.find_element(By.XPATH,'/html/body/div[7]/div[1]/div[1]/div[1]/form/input[1]').send_keys('python',Keys.ENTER)
time.sleep(1)
web.find_element(By.XPATH,'//*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[1]/a').click()
#如何进入到新窗口中进行提取
#注意，在selenium眼中，新窗口是默认不切换过来的
web.switch_to.window(web.window_handles[1])#切换窗口
#在新窗口中提取信息
job_detail=web.find_element(By.XPATH,'//*[@id="job_detail"]/dd[2]/div').text
print(job_detail)
web.close()#关掉子窗口，视角还在原处
web.switch_to.window(web.window_handles[0])#视角回到第一个窗口中
print(web.find_element(By.XPATH,'//*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[1]/a').text)'''
#如果页面中遇到iframe怎么处理
web.get('https://www.91kanju.com/vod-play/541-2-1.html')
#处理iframe，必须先拿到iframe，然后切换视角iframe，再然后拿到数据
iframe=web.find_element(By.XPATH,'iframe的xpath')
web.switch_to.frame(iframe)#切换到iframe
web.switch_to.default_content()#回到默认的原始页面
tx=web.find_element(By.XPATH,'iframe内的xpath').text
print(tx)

#只运行不弹窗：无头浏览器
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
#准备好参数配置
opt=Options()
opt.add_argument('--headless')#无头
opt.add_argument('==disable-gpu')#不可用gpu，显卡
web=Chrome(options=opt)#把参数配置设置到浏览器中
web.get(('')
#定位到下拉列表
sel_el=web.find_element(By.XPATH,'下拉列表节点')
#对元素进行包装，包装成下拉菜单
sel=Select(sel_el)
#让浏览器进行调整选项
for i in range(len(sel.options)):#在下拉框所有选项中循环，i就是每一个下拉框选项的索引
    sel.select_by_index(i)#根据第i项进行切换
    time.sleep(2)#等待网络加载
    web.find_element(By.XPATH,'table的xpath')
    print(table.text)#打印文本信息

print('ok')
web.close()#关闭web
#如何拿到页面代码（经过数据加载以及js执行之后的结果的html内容）
web.page_source

#验证码
#1.图像识别2.选择互联网上成熟的验证码破解工具：用超级鹰，软件id
#超级鹰识别超级鹰
from selenium.webdriver import Chrome
import time
from chaojiying import Chaojiying_Client
from selenium.webdriver.common.by import By
web=Chrome()
web.get('http://www.chaojiying.com/user/login/')
#处理验证码
img=web.find_element(By.XPATH,'/html/body/div[3]/div/div[3]/div[1]/form/div/img').screenshot_as_png#截屏为png
chaojiying = Chaojiying_Client('账号', '密码', '软件id')
dic=chaojiying.PostPic(img,1902)
verify_code=dic['pic_str']
#填写用户名，密码，验证码
web.find_element(By.XPATH,'/html/body/div[3]/div/div[3]/div[1]/form/p[1]/input').send_keys('账号')
web.find_element(By.XPATH,'/html/body/div[3]/div/div[3]/div[1]/form/p[2]/input').send_keys('密码')
web.find_element(By.XPATH,'/html/body/div[3]/div/div[3]/div[1]/form/p[3]/input').send_keys(verify_code)
#点击登录
web.find_element(By.XPATH,'/html/body/div[3]/div/div[3]/div[1]/form/p[4]/input').click()
time.sleep()

#登录12306
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
#如果你的程序被识别到了，怎么办,f12的console中window.navigator.webdriver为true
#chrome版本<88，在启动浏览器时（此时没有加载任何网页内容 ），向页面嵌入js代码，去掉webdriver
'''web=Chrome()
web.execute_cdp_cmd("Page.addScriptToEvaluate0nNewDocument",{
    "source":"""
    navigator.webdriver = undefined
     Object,defineProperty(navigator,'webdriver',{
        get: () => undefined
    })
   """
})
web.get(xxxx)'''
#chrome版本>88
option = Options()
# option.add_experimental_option('excludeSwitches',['enable-automation'])
option.add_argument('--disabLe-blink-features=AutomationControlled')#防止识别为自动程序
web=Chrome(options=option)
web.get('https://kyfw.12306.cn/otn/resources/login.html')
time.sleep(3)
#输入用户名密码
web.find_element(By.XPATH,'/html/body/div[1]/div[2]/div[2]/div[1]/div[1]/div[1]/input').send_keys('wzc886')
web.find_element(By.XPATH,'/html/body/div[1]/div[2]/div[2]/div[1]/div[1]/div[2]/input').send_keys('021122wwwWWW')
#点击登录
web.find_element(By.XPATH,'//*[@id="J-login"]').click()
#拖拽
time.sleep(1)
btn=web.find_element(By.XPATH,'/html/body/div[1]/div[4]/div[2]/div[2]/div/div/div[2]/div/div[1]/span')#拖拽目标
ActionChains(web).drag_and_drop_by_offset(btn,341,0).perform()

posted @ 2022-03-25 16:30 wzc6 阅读(145) 评论(0) 收藏举报

刷新页面返回顶部

wzc6

selenium库的使用

公告