Python模拟登陆万能法
用到的库有“selenium”和“requests”。通过selenium进行模拟登陆,然后将Cookies传入requests,最终用requests进行网站的抓取。优点就是不但规避了“selenium”其本身抓取速度慢的问题(因为仅仅用其作为登陆),又规避了利用requests登陆时需要制作繁琐的Cookies的过程(因为是从selenium直接拿来cookies)。
登陆代码-知乎
from selenium import webdriver
from requests import Session
from time import sleep
req = Session()
req.headers.clear()
chromePath = r'D:\Python Program\chromedriver.exe'
wd = webdriver.Chrome(executable_path= chromePath)
zhihuLogInUrl = 'https://www.zhihu.com/signin'
wd.get(zhihuLogInUrl)
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/div[1]/div[1]/div[2]/span').click()
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/form/div[1]/div[1]/input').send_keys('username')
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/form/div[1]/div[2]/input').send_keys('password')
sleep(10) #手动输入验证码
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/form/div[2]/button').submit()
sleep(10)#等待Cookies加载
cookies = wd.get_cookies()
for cookie in cookies:
req.cookies.set(cookie['name'],cookie['value'])x
18
1
from selenium import webdriver2
from requests import Session3
from time import sleep4
req = Session()5
req.headers.clear() 6
chromePath = r'D:\Python Program\chromedriver.exe'7
wd = webdriver.Chrome(executable_path= chromePath) 8
zhihuLogInUrl = 'https://www.zhihu.com/signin'9
wd.get(zhihuLogInUrl)10
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/div[1]/div[1]/div[2]/span').click()11
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/form/div[1]/div[1]/input').send_keys('username') 12
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/form/div[1]/div[2]/input').send_keys('password')13
sleep(10) #手动输入验证码 14
wd.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/form/div[2]/button').submit() 15
sleep(10)#等待Cookies加载16
cookies = wd.get_cookies()17
for cookie in cookies:18
req.cookies.set(cookie['name'],cookie['value'])
浙公网安备 33010602011771号