需要逆向执行的js中有window、document时,使用jsdom。(pyexecjs、nodejs、jsdom)

在爬取网站数据时,当需要逆向执行带有window、document等语句的js代码时,需要用到jsdom。

1,pip install jsdom  //安装jsdom

2,编写js文件内容,demo.js    此为网上例子内容,未验证。

const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const dom = new JSDOM('<html><head></head><body>开启js,刷新吧,\n' +
    '<script>\n' +
    '    eval(function (p, a, c, k, e, d) {\n' +
    '        e = function (c) {\n' +
    '            return (c < a ? "" : e(parseInt(c / a))) + ((c = c % a) > 35 ? String.fromCharCode(c + 29) : c.toString(36))\n' +
    '        };\n' +
    '        if (!\'\'.replace(/^/, String)) {\n' +
    '            while (c--) d[e(c)] = k[c] || e(c);\n' +
    '            k = [function (e) {\n' +
    '                return d[e]\n' +
    '            }];\n' +
    '            e = function () {\n' +
    '                return \'\\\\w+\'\n' +
    '            };\n' +
    '            c = 1;\n' +
    '        }\n' +
    '        ;\n' +
    '        while (c--) if (k[c]) p = p.replace(new RegExp(\'\\\\b\' + e(c) + \'\\\\b\', \'g\'), k[c]);\n' +
    '        return p;\n' +
    '    }(\'l E(a){5 b=9.q;5 c=b.G("; ");M(5 i=0;i<c.D;i++){5 d=c[i].G("=");j(a==d[0]){g d[1]}}g""}m=E(\\\'m\\\');5 7="Q+/=";K=R.N;l r(a){5 b,i,k;5 c,e,n;k=a.D;i=0;b="";O(i<k){c=a.o(i++)&P;j(i==k){b+=7.8(c>>2);b+=7.8((c&p)<<4);b+="==";s}e=a.o(i++);j(i==k){b+=7.8(c>>2);b+=7.8(((c&p)<<4)|((e&A)>>4));b+=7.8((e&B)<<2);b+="=";s}n=a.o(i++);b+=7.8(c>>2);b+=7.8(((c&p)<<4)|((e&A)>>4));b+=7.8(((e&B)<<2)|((n&S)>>6));b+=7.8(n&L)}g b}l H(){5 w=f.10||9.v.z||9.u.z;5 h=f.11||9.v.C||9.u.C;j(w*h<=12){g I}5 x=f.13;5 y=f.Z;j(x+w<=0||y+h<=0||x>=f.t.W||y>=f.t.V){g I}g X}l J(){j(H()){}Y{5 a="";a="T="+r(m.U(1,3))+"; F=/";9.q=a;a="e="+r(m)+"; F=/";9.q=a;f.14(K)}}J();\', 62, 67, \'|||||var||encoderchars|charAt|document|||||c2|window|return|||if|len|function|session|c3|charCodeAt|0x3|cookie|f1|break|screen|body|documentElement||||clientWidth|0xf0|0xf|clientHeight|length|getCookie|path|split|findDimensions|true|reload|url|0x3f|for|href|while|0xff|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789|location|0xc0|c1|substr|height|width|false|else|screenY|innerWidth|innerHeight|120000|screenX|open\'.split(\'|\'), 0, {}))\n' +
    '</script>\n' +
    '</body></html>');
window = dom.window;
document = window.document;
// function getCookie(a)
// {
//     var b= "session=0b45495fefe1528103982a7f043f10be;";
//     var c=b.split("; ");
//     for(var i=0;i<c.length;i++) {
//         var d=c[i].split("=");
//         if(a==d[0]){
//             return d[1]
//         }
//     }
//     return""
// }
// session=getCookie('session');
var encoderchars="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
url='http://datamining.comratings.com/exam';
function f1(a){
    var b,i,len;
    var c,c2,c3;
    len=a.length;
    i=0;b="";
    while(i<len){
        c=a.charCodeAt(i++)&0xff;
        if(i==len){
            b+=encoderchars.charAt(c>>2);
            b+=encoderchars.charAt((c&0x3)<<4);
            b+="==";break
        }
        c2=a.charCodeAt(i++);
        if(i==len){
            b+=encoderchars.charAt(c>>2);
            b+=encoderchars.charAt(((c&0x3)<<4)|((c2&0xf0)>>4));
            b+=encoderchars.charAt((c2&0xf)<<2);
            b+="=";break
        }
        c3=a.charCodeAt(i++);
        b+=encoderchars.charAt(c>>2);
        b+=encoderchars.charAt(((c&0x3)<<4)|((c2&0xf0)>>4));
        b+=encoderchars.charAt(((c2&0xf)<<2)|((c3&0xc0)>>6));
        b+=encoderchars.charAt(c3&0x3f)
    }
    return b
}
function findDimensions(){
    var w=window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth;
    var h=window.innerHeight||document.documentElement.clientHeight||document.body.clientHeight;
    if(w*h<=120000){
        return true
    }
    var x=window.screenX;
    var y=window.screenY;
    if(x+w<=0||y+h<=0||x>=window.screen.width||y>=window.screen.height){
        return true
    }
    return false
}
function reload(session){
 
    var a="";
    a1="c1="+f1(session.substr(1,3))+"; c2=" + f1(session) + ";";
    // c1 = a1
    // // document.cookie=a;
    // a2="c2="+f1(session)+"; path=/";
    // c2 = a2
    // document.cookie=a2;
    return a1
    // window.open(url);
 
    // return 1
}

 

3、逆向代码

import requests
# import js2py
import execjs
 
 
 
# print(str_content)
 
res = requests.get('http://datamining.comratings.com/exam')
cookie = res.cookies
print(cookie.values(), type(cookie.values()))
str_content = execjs.compile(open(r"demo.js", encoding='utf8').read()).call('reload', cookie.values()[0])
# str_content = execjs.eval(open(r"demo.js", encoding='utf8').read())
print(str_content)
 
name1 = str_content.split(';')[0].split('=')[0]
value1 = str_content.split(';')[0].split('=')[1]
value2 = str_content.split(';')[1].split('c2=')[1]
# name2 = str_content.split(';')[1].split('=')[0]
cookie.set(name1, value1)
# res2 = requests.get('http://datamining.comratings.com/exam', cookies=cookie)
cookie.set('c2', value2)
res3 = requests.get('http://datamining.comratings.com/exam3', cookies=cookie)
with open('content_exam.html', 'r', encoding='utf8') as f:
    f.write(res3.text)

 

posted on 2021-02-21 10:23  东门乱弹琴  阅读(1713)  评论(0)    收藏  举报

导航