selenium学习
from selenium import webdriver from selenium.webdriver import ChromeOptions from selenium.webdriver.common.keys import Keys import time import requests def func(): browser = webdriver.Chrome(chrome) # 实例化浏览器对象 url = 'https://www.baidu.com' browser.get(url) # 发送get请求 browser.save_screenshot('baidu.png') # 截图页面 browser.close() # 关闭当前页面 browser.quit() # 退出 def func2(): browser = webdriver.Chrome(chrome) url = 'https://movie.douban.com' browser.get(url) browser.maximize_window() # 最大化窗口 # 输入文本 browser.find_element_by_xpath('//*[@id="inp-query"]').send_keys('周星驰') # 点击搜索 browser.find_element_by_xpath('//*[@id="db-nav-movie"]/div[1]/div/div[2]/form/fieldset/div[2]/input').click() # 获取当前网页源码 print(browser.page_source) # 在网页源码中查找 print(browser.page_source.find('keywords')) # 查看cookie browser.get_cookies() # 获取当前url print(browser.current_url) # 切换frame browser.switch_to_frame('id name') # 页面进度条下拉操作 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') def func3(): url = 'http://www.pyttip.com/user/login' browser = webdriver.Chrome(chrome) browser.maximize_window() # 请求网页 browser.get(url) # 输入账号 browser.find_element_by_xpath('//*[@id="editor_name"]').send_keys('lianggege') # 输入密码 browser.find_element_by_xpath('//*[@id="editor_pwd"]').send_keys('123') # 点击登录 browser.find_element_by_xpath('//*[@id="apLogin"]/fieldset/div[3]/input').click() class JdSpider(): def __init__(self): self.url = 'https://www.jd.com' self.browser = webdriver.Chrome(chrome) self.browser.implicitly_wait(10) # 隐式等待 def get_page(self): self.browser.get(self.url) self.browser.find_element_by_xpath('//*[@id="key"]').send_keys('爬虫') self.browser.find_element_by_xpath('//*[@id="search"]/div/div[2]/button').click() self.browser.find_element_by_class_name('pn-next').click() self.browser.close() self.browser.quit() def main(self): self.get_page() def func4(): # 防止被反爬 option = ChromeOptions() # option.add_argument('--headless') # 变成无头浏览器 option.add_experimental_option('excludeSwitches', ['enable-automation']) # 关闭自动化扩展信息 option.add_experimental_option('useAutomationExtension', False) browser = webdriver.Chrome(chrome, options=option) browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) url = 'https://www.zhipin.com' browser.get(url) browser.find_element_by_xpath('//*[@id="wrap"]/div[4]/div/div/div[1]/form/div[2]/p/input').send_keys('python') time.sleep(3) browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': 'Object.defineProperty(navigator, "webdriver", {get:()=>undefined})'}) browser.find_element_by_xpath('//*[@id="wrap"]/div[4]/div/div/div[1]/form/button').send_keys(Keys.ENTER) def func5(): # 访问首页 url = 'https://www.jd.com' browser = webdriver.Chrome(chrome) browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) browser.get(url) browser.maximize_window() # 文本输入点击搜索 browser.find_element_by_xpath('//*[@id="key"]').send_keys('python') browser.find_element_by_xpath('//*[@id="search"]/div/div[2]/button').click() time.sleep(5) # 模拟下拉 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') # 获取网页源码 html = browser.page_source # 解析网页 li_list = browser.find_elements_by_xpath('//*[@id="J_goodsList"]/ul') for li in li_list: print(li) browser.quit() def func6(): # 访问首页 url = 'https://www.zhipin.com' # 防止被反爬 option = ChromeOptions() # option.add_argument('--headless') # 变成无头浏览器 option.add_experimental_option('excludeSwitches', ['enable-automation']) # 关闭自动化扩展信息 option.add_experimental_option('useAutomationExtension', False) browser = webdriver.Chrome(chrome, options=option) browser.get(url) browser.maximize_window() # 文本输入点击搜索 browser.find_element_by_xpath('//*[@id="wrap"]/div[4]/div/div/div[1]/form/div[2]/p/input').send_keys('python') browser.find_element_by_xpath('//*[@id="wrap"]/div[4]/div/div/div[1]/form/button').click() time.sleep(5) # 获取网页源码 html = browser.page_source # 解析网页 li_list = browser.find_elements_by_xpath('//*[@id="main"]/div/div[2]/ul/li') for li in li_list: print(li.text) browser.quit() def func7(): api_url = 'http://music.163.com/song/media/outer/url?id=152428' header = {} resp = requests.get(api_url, headers=header) with open('朋友.mps', 'wb') as f: f.write(resp.content) def func8(): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': 'image.baidu.com', 'Upgrade-Insecure-Requests': '1', 'Cookie': 'BDqhfp=python3%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB%E5%BC%80%E5%8F%91%E5%AE%9E%E8%B7%B5%26%26NaN-1undefined%26%260%26%261; PSTM=1585531752; BAIDUID=CA59CB70CF87EDC55E83B1B18AC659EE:FG=1; BIDUPSID=A7FD529C72D6849145DB5CEB87BF005D; BDUSS=NWd2RHcHRrMXFRMHczMmNaSWxHa1NZUncyZGdNS1NpRUFla2RuWUw2ZmZEczVlRVFBQUFBJCQAAAAAAAAAAAEAAAC~o9uvbGlhbmdnZWdlaGNsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAN-Bpl7fgaZeel; MCITY=-131%3A; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=5; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; H_PS_PSSID=; userFrom=www.baidu.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36' } response = requests.get('http://www.baidu.com', headers=headers) print(response.text) # # print(response.request.headers) # 获取请求头 # html = response.text # with open('123.jpg', 'wb') as f: # f.write(response.content) func8()