海南免税商城数据爬取(破解滑块验证码)

1. python部分

main.py

# -*- coding:utf-8 -*-

# @Time : 2024/4/15 21:24
# @Author : 快乐的小猴子
# @Version :
# @Function :
import subprocess
from functools import partial  # 专门用来固定参数的
subprocess.Popen = partial(subprocess.Popen, encoding="utf-8")
import execjs
import requests
import base64
"""
https://m.hltmsp.com/passport/login?backURL=%2F%2Fm.hltmsp.com%2Fuser
表单携带参数  sign
第一次的ident的值为时间戳  后续的ident的值为从响应里进行获取

思路
第一次登录请求会失败  并返回滑动验证
滑动验证通过后  再次进行登录  所以当前一共是4次请求
第一次请求
携带当前的参数 其中第一次参数中的ident为时间戳
第二次请求
请求当前的滑动验证码的接口 获取滑动验证的俩个图片  其中会返回加密后的ident和滑动验证码的高度y
第三次请求
将滑动验证后的x,y的值进行传递 当前参数中的ident为上次返回的
第四次请求
进行登录 将上面的ident获取 并进行传递

注意
会有一定概率失败 重新运行即可
"""
# 创建session对象
session = requests.Session()
# 登录接口URL
url = 'https://api.hltmsp.com/user/index/login'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    'Sec-Ch-Ua-Platform': "Android",
    'Sec-Fetch-Site': 'same-site',
    'Referer': 'https://m.hltmsp.com/',
}
# 账号信息
mobile = ""  # 自己注册
password = "" # 自己注册
nonce_str = 'o5ja9e8rrf6foed8r7dfewdrfsd'    # 随机值
data = {
    "mobile": mobile,
    "password": password,
    "platformId": "1",
    "shop_id": "6",
    "token": "",
    "appid": "duty_h5",
    "nonce_str": nonce_str,
}
with open('mianshui.js', 'r', encoding='UTF-8') as f:
    js_code = f.read()
context = execjs.compile(js_code)
result = context.call('mianshui_login', mobile, password, nonce_str)
print(result, '第一次登录请求')
data['sign'] = result['sign']
data['time_stamp'] = result['time_stamp']
data['ident'] = result['ident']
session.post(url, headers=headers, data=data)
'appid=duty_h5&ident=1701331075944&nonce_str=ofweg8efj3j&platformId=1&shop_id=6&t=1701331075943&time_stamp=33893723382&app_key=hltmsp5615466'
result = context.call('mianshui_code')


print(result, '第二次请求-获取验证码')

params = {
    "platformId": "1",
    "shop_id": "6",
    "t": result['ident'],
    "ident": result['ident'],
    "appid": "duty_h5",
    "nonce_str": "ofweg8efj3j",
    "time_stamp": result['time_stamp'],
    "sign": result['sign']
}
code_url = 'https://api.hltmsp.com/slider/index/get-code'
res = session.get(code_url, headers=headers, params=params)
code_data = res.json()['data']
y = code_data['y']  # 为当前的滑动验证码缺口的高度
ident = code_data['ident']  # 进行滑动验证后传递到后台get传参的值
# base64滑块验证图片
# 取出俩张验证码图片
puzzle = code_data['puzzle'].split(',')[1]
watermark = code_data['watermark'].split(',')[1]
watermark_path = "watermark.png"
puzzle_path = "puzzle.png"
puzzle_data = base64.b64decode(puzzle)
# 将bas464验证码图片写入本地
with open(puzzle_path, 'wb') as f:
    f.write(puzzle_data)
watermark_data = base64.b64decode(watermark)
with open(watermark_path, 'wb') as f:
    f.write(watermark_data)
# 图鉴验证码识别
from tujian import base64_api
result = base64_api(uname='用户名', pwd='密码', img=puzzle_path, watermark=watermark_path, typeid=18)

# 拼凑验证码的滑动后的值
position = str(result.split(',')[0])+'_'+str(y)
check_params = {
    "position": position,
    "ident": ident,
    "platformId": "1",
    "shop_id": "6",
    "appid": "duty_h5",
    "nonce_str": nonce_str,
    # "t": "1701333773050",
    # "time_stamp": "45063372338",
    # "sign": "2897A6ADD258E1789EBF4756CD9B7007"
}

# 'appid=duty_h5&ident=${ident}&nonce_str=${nonce_str}&platformId=1&position=${position}&shop_id=6&t=1701334716993&time_stamp=${time_stamp}&app_key=hltmsp5615466'
check_url = 'https://api.hltmsp.com/slider/index/check'
result = context.call('mianshui_check', ident, nonce_str, position)
print(result, '第三次 发送验证码值')
check_params['sign'] = result['sign']
check_params['time_stamp'] = result['time_stamp']
check_params['t'] = result['time']
# 进行验证码请求
res = session.get(check_url, headers=headers, params=check_params)
print(res.json())


# 最后一次登录
result = context.call('mianshui_login', mobile, password, data['nonce_str'], ident)
print(result, '第四次 进行最后登录')
data['sign'] = result['sign']
data['time_stamp'] = result['time_stamp']
data['ident'] = ident
res = session.post(url, headers=headers, data=data)
print(res.json())

tujian.py

import base64
import json
import requests
"""
识别验证码图片 图鉴
"""

def base64_api(uname, pwd, img,watermark, typeid):
    with open(img, 'rb') as f:
        base64_data = base64.b64encode(f.read())
        b64 = base64_data.decode()
    with open(watermark, 'rb') as f:
        base64_data = base64.b64encode(f.read())
        b64_imageback = base64_data.decode()
    data = {"username": uname, "password": pwd, "typeid": typeid, "image": b64, 'imageback':b64_imageback}
    result = json.loads(requests.post("http://api.ttshitu.com/predict", json=data).text)
    if result['success']:
        return result["data"]["result"]
    else:
        # !!!!!!!注意:返回 人工不足等 错误情况 请加逻辑处理防止脚本卡死 继续重新 识别
        return result["message"]
    return ""


if __name__ == "__main__":
    watermark = "watermark.png"
    puzzle = "puzzle.png"
    result = base64_api(uname='luckyboyxlg', pwd='17346570232', img=puzzle,watermark=watermark, typeid=18)
    print(result)
    print(type(result))
    # 拼凑验证码的滑动后的值
    position = str(result.split(',')[0])+'_'+str(1)
    print(position)

mianshui.js

const CryptoJS = require('crypto-js');
/*
* 优化后的
* */

// 登录方法
function mianshui_login(mobile,password, nonce_str, i=null){
    // 第一次登录的ident的值为生成的时间戳 最后一次登录需要传递进来的ident
    var ident = i?i:new Date().getTime();
    let  t = 2022051288 + Math.floor(Date.now() / 1e3) + "";
    let n = Math.floor(9 * Math.random()) + 1;
    let time_stamp = "" + n + t.substring(t.length - n, t.length) + t.substring(0, t.length - n)
    let o = `appid=duty_h5&ident=${ident}&mobile=${mobile}&nonce_str=${nonce_str}&password=${password}&platformId=1&shop_id=6&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    const sign = CryptoJS.MD5(o).toString().toUpperCase();
    console.log(sign)
    return {sign:sign, time_stamp: time_stamp, ident: ident}
}
/*'appid=duty_h5&ident=1701325763030&mobile=17333333333&nonce_str=wob2whdd9f91f6degffcf4eftr&password=17333333333&platformId=1&shop_id=6&time_stamp=97233770513&app_key=hltmsp5615466'*/


// 发送验证码值
function mianshui_check(ident, nonce_str, position){
    var time = (new Date).getTime();
    let  t = 2022051288 + Math.floor(Date.now() / 1e3) + "";
    let n = Math.floor(9 * Math.random()) + 1;
    let time_stamp = "" + n + t.substring(t.length - n, t.length) + t.substring(0, t.length - n)
    let t_time = (new Date).getTime();
    // let o = `appid=duty_h5&ident=${ident}&nonce_str=ofweg8efj3j&platformId=1&shop_id=6&t=${ident+2}&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    let o = `appid=duty_h5&ident=${ident}&nonce_str=${nonce_str}&platformId=1&position=${position}&shop_id=6&t=${t_time}&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    const sign = CryptoJS.MD5(o).toString().toUpperCase();
    console.log(sign)
    return {sign:sign, time_stamp: time_stamp, time:time}
}
// 获取验证码
function mianshui_code(){
    let ident = (new Date).getTime()
    let  t = 2022051288 + Math.floor(Date.now() / 1e3) + "";
    let n = Math.floor(9 * Math.random()) + 1;
    let time_stamp = "" + n + t.substring(t.length - n, t.length) + t.substring(0, t.length - n)
    let o = `appid=duty_h5&ident=${ident}&nonce_str=ofweg8efj3j&platformId=1&shop_id=6&t=${ident+2}&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    const sign = CryptoJS.MD5(o).toString().toUpperCase();
    console.log(sign)
    return {sign:sign, time_stamp: time_stamp, ident: ident}
}

说明:
在Terminal控制台安装Crypto加密库,执行 npm install crypto-js命令,如需更换npm镜像可参考 js逆向部分 的更换npm为国内镜像随笔文章。

posted @ 2024-04-22 22:26  生而自由爱而无畏  阅读(3)  评论(0)    收藏  举报