(32)scrapy 登录
import scrapy
class LogSpider(scrapy.Spider):
name = 'log'
allowed_domains = ['sxt.cn']
# start_urls = ['http://sxt.cn/']
def start_requests(self):
url=''
form_data={
'user': ,
'password': ,
}
yield scrapy.FormRequest(url,formdata=form_data,callback=self.parse)
def parse(self, response):
print(response.text)
存在错误,作为笔记保存
import scrapy
import re
class LogSpider(scrapy.Spider):
name = 'log'
allowed_domains = ['ganji.com']
start_urls = ['https://passport.ganji.com/login.php']
def parse(self, response):
hash_code = re.findall(r'"__hash__":"(.+)"',response.text)[0]
img_url = response.xpath('''//div/label/img[@alt='图片验证码']/@src''').extract_first()
yield scrapy.Request(img_url, callback=self.parse_info,meta={'hash_code':hash_code})
def parse_info(self,response):
hash_code = response.request.meta['hash_code']
with open('yzm.jpg','wb') as f:
f.write(response.body)
f.close()
code = input('请输入验证码')
form_data={
'username': '15506306812',
'password': 'L06306812',
'setcookie': '0',
'checkCode':code,
'next': '/',
'source': 'passport',
'__hash__': hash_code
}
url = 'https://passport.ganji.com/login.php?next=/'
yield scrapy.FormRequest(url, formdata = form_data, callback = self.after_login)
def after_login(self,response):
print(response.text)

浙公网安备 33010602011771号