验证码识别与模拟登录

一、验证码识别

验证码识别是基于线上的打码平台识别验证码

-打码平台:

  1.超级鹰(http://www.chaojiying.com/)

    -注册(用户中心身份)

    -登录(用户中心身份)

      -1.查询余额,请充值

      -2.创建一个软件ID(899370)

      -3.下载示例代码

  2.云打码

  3.打码兔

示例代码

#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5

class Chaojiying_Client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        password =  password.encode('utf8')
        self.password = md5(password).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
            'Connection': 'Keep-Alive',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
        }

    def PostPic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
        return r.json()

    def PostPic_base64(self, base64_str, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
            'file_base64':base64_str
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, headers=self.headers)
        return r.json()

    def ReportError(self, im_id):
        """
        im_id:报错题目的图片ID
        """
        params = {
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
        return r.json()



#封装一个执行方法
def tranformImageCode(imagePath,imageType):
        chaojiying = Chaojiying_Client('用户账号', '用户密码, '949287')    #用户中心>>软件ID 生成一个替换 96001
        im = open(imagePath, 'rb').read()
        return (chaojiying.PostPic(im, imageType))["pic_str"]


print(tranformImageCode('D:\爬虫项目\Chaojiying_Python\chaojiying_Python/a.jpg',1902))

二、模拟登录

流程:

  -对点击登录按钮发送post请求

  -处理请求参数

    -用户名

    -密码

    -验证码

    -其他防伪参数

#识别验证码
session = requests.Session()
url = "https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx"
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}

page_text = session.get(url=url,headers=headers).text

# 解析验证码图片地址
tree= etree.HTML(page_text)
img_src = 'https://so.gushiwen.cn/' + tree.xpath('//*[@id="imgCode"]/@src')[0]
# 将验证码图片保存到本地
img_data = session.get(img_src,headers=headers).content
with open('./code.jpg','wb') as fp:
    fp.write(img_data)
    
# 识别验证码
code_text = tranformImageCode('./code.jpg',1902)
print(code_text)
login_url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'  # 发送post请求的url
data = {
    "__VIEWSTATE": "npWNBT+tK2+N2DEleq19LTIIVsGC6RS0c4MT3S4qamIevyKoXBYgMUhuepzlZDtNDNiRaUW8eAF1rCN8XxmDuPXzRg8bsy7bOx0evDPWd1xGMWVga4lsQZu+YPeGc64TLcSF/tFeNNZQBPFj2poVjNm/9hs=",
"__VIEWSTATEGENERATOR": "C93BE1AE",
"from": "http://so.gushiwen.cn/user/collect.aspx",
"email": "18317992874",
"pwd": "XY.563628832",
"code": code_text,
"denglu": "登录",
}

# 点击登录按钮发送post请求
page_text_login = session.post(url=login_url,headers=headers,data=data).text
with open('./gushiwen.html','w',encoding='utf-8')as fp:
    fp.write(page_text_login)

 

posted @ 2023-05-31 10:02  小青年て  阅读(88)  评论(0编辑  收藏  举报