第十关带验证码的登录爬取
方法一走登录验证码逻辑,搞定验证码和一个变化的参数,练技术
点击查看代码
import requests
from lxml import etree
session = requests.Session()
url = 'https://www.spiderbuf.cn/playground/e02'
login_url = 'https://www.spiderbuf.cn/playground/e02/login'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
# 拿到验证码的url 和captchaId
response = session.get(url, headers=headers)
root = etree.HTML(response.text)
src = 'https://www.spiderbuf.cn/playground' + root.xpath('//img[@id="image"]/@src')[0].replace('./', '/')
# print(src)
captchaId = root.xpath('//input[@name="captchaId"]/@value')[0]
# print(captchaId)
res = session.get(url=src, headers=headers)
with open('yzm.jpg', 'wb') as f:
f.write(res.content)
yzm = input('输入验证码:')
data = {
"username": "admin",
"password": "123456",
"captchaSolution": yzm,
"captchaId": captchaId
}
# 发送登录请求
login_res = session.post(url=login_url, headers=headers, data=data)
# print(login_res.text)
# 解析数据
html = etree.HTML(login_res.text)
trs = html.xpath('//table//tr')[1:]
for tr in trs:
paiming = tr.xpath('./td[1]/text()')[0]
guzhi = tr.xpath('./td[2]/text()')[0]
xinxi = tr.xpath('./td[3]/text()')[0]
ceo = tr.xpath('./td[4]/text()')[0]
hangye = tr.xpath('./td[5]/text()')[0]
print(paiming, guzhi, xinxi, ceo, hangye)
点击查看代码
import requests
from lxml import etree
url = 'https://www.spiderbuf.cn/playground/e02/list'
headers = {
"cookie": "admin=d957506323983e22b4504f235941d976",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
html = etree.HTML(response.text)
trs = html.xpath('//table//tr')[1:]
for tr in trs:
paiming = tr.xpath('./td[1]/text()')[0]
guzhi = tr.xpath('./td[2]/text()')[0]
xinxi = tr.xpath('./td[3]/text()')[0]
ceo = tr.xpath('./td[4]/text()')[0]
hangye = tr.xpath('./td[5]/text()')[0]
print(paiming, guzhi, xinxi, ceo, hangye)

浙公网安备 33010602011771号