需要逆向执行的js中有window、document时,使用jsdom。(pyexecjs、nodejs、jsdom)
在爬取网站数据时,当需要逆向执行带有window、document等语句的js代码时,需要用到jsdom。
1,pip install jsdom //安装jsdom
2,编写js文件内容,demo.js 此为网上例子内容,未验证。
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const dom = new JSDOM('<html><head></head><body>开启js,刷新吧,\n' +
'<script>\n' +
' eval(function (p, a, c, k, e, d) {\n' +
' e = function (c) {\n' +
' return (c < a ? "" : e(parseInt(c / a))) + ((c = c % a) > 35 ? String.fromCharCode(c + 29) : c.toString(36))\n' +
' };\n' +
' if (!\'\'.replace(/^/, String)) {\n' +
' while (c--) d[e(c)] = k[c] || e(c);\n' +
' k = [function (e) {\n' +
' return d[e]\n' +
' }];\n' +
' e = function () {\n' +
' return \'\\\\w+\'\n' +
' };\n' +
' c = 1;\n' +
' }\n' +
' ;\n' +
' while (c--) if (k[c]) p = p.replace(new RegExp(\'\\\\b\' + e(c) + \'\\\\b\', \'g\'), k[c]);\n' +
' return p;\n' +
' }(\'l E(a){5 b=9.q;5 c=b.G("; ");M(5 i=0;i<c.D;i++){5 d=c[i].G("=");j(a==d[0]){g d[1]}}g""}m=E(\\\'m\\\');5 7="Q+/=";K=R.N;l r(a){5 b,i,k;5 c,e,n;k=a.D;i=0;b="";O(i<k){c=a.o(i++)&P;j(i==k){b+=7.8(c>>2);b+=7.8((c&p)<<4);b+="==";s}e=a.o(i++);j(i==k){b+=7.8(c>>2);b+=7.8(((c&p)<<4)|((e&A)>>4));b+=7.8((e&B)<<2);b+="=";s}n=a.o(i++);b+=7.8(c>>2);b+=7.8(((c&p)<<4)|((e&A)>>4));b+=7.8(((e&B)<<2)|((n&S)>>6));b+=7.8(n&L)}g b}l H(){5 w=f.10||9.v.z||9.u.z;5 h=f.11||9.v.C||9.u.C;j(w*h<=12){g I}5 x=f.13;5 y=f.Z;j(x+w<=0||y+h<=0||x>=f.t.W||y>=f.t.V){g I}g X}l J(){j(H()){}Y{5 a="";a="T="+r(m.U(1,3))+"; F=/";9.q=a;a="e="+r(m)+"; F=/";9.q=a;f.14(K)}}J();\', 62, 67, \'|||||var||encoderchars|charAt|document|||||c2|window|return|||if|len|function|session|c3|charCodeAt|0x3|cookie|f1|break|screen|body|documentElement||||clientWidth|0xf0|0xf|clientHeight|length|getCookie|path|split|findDimensions|true|reload|url|0x3f|for|href|while|0xff|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789|location|0xc0|c1|substr|height|width|false|else|screenY|innerWidth|innerHeight|120000|screenX|open\'.split(\'|\'), 0, {}))\n' +
'</script>\n' +
'</body></html>');
window = dom.window;
document = window.document;
// function getCookie(a)
// {
// var b= "session=0b45495fefe1528103982a7f043f10be;";
// var c=b.split("; ");
// for(var i=0;i<c.length;i++) {
// var d=c[i].split("=");
// if(a==d[0]){
// return d[1]
// }
// }
// return""
// }
// session=getCookie('session');
var encoderchars="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
url='http://datamining.comratings.com/exam';
function f1(a){
var b,i,len;
var c,c2,c3;
len=a.length;
i=0;b="";
while(i<len){
c=a.charCodeAt(i++)&0xff;
if(i==len){
b+=encoderchars.charAt(c>>2);
b+=encoderchars.charAt((c&0x3)<<4);
b+="==";break
}
c2=a.charCodeAt(i++);
if(i==len){
b+=encoderchars.charAt(c>>2);
b+=encoderchars.charAt(((c&0x3)<<4)|((c2&0xf0)>>4));
b+=encoderchars.charAt((c2&0xf)<<2);
b+="=";break
}
c3=a.charCodeAt(i++);
b+=encoderchars.charAt(c>>2);
b+=encoderchars.charAt(((c&0x3)<<4)|((c2&0xf0)>>4));
b+=encoderchars.charAt(((c2&0xf)<<2)|((c3&0xc0)>>6));
b+=encoderchars.charAt(c3&0x3f)
}
return b
}
function findDimensions(){
var w=window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth;
var h=window.innerHeight||document.documentElement.clientHeight||document.body.clientHeight;
if(w*h<=120000){
return true
}
var x=window.screenX;
var y=window.screenY;
if(x+w<=0||y+h<=0||x>=window.screen.width||y>=window.screen.height){
return true
}
return false
}
function reload(session){
var a="";
a1="c1="+f1(session.substr(1,3))+"; c2=" + f1(session) + ";";
// c1 = a1
// // document.cookie=a;
// a2="c2="+f1(session)+"; path=/";
// c2 = a2
// document.cookie=a2;
return a1
// window.open(url);
// return 1
}
3、逆向代码
import requests # import js2py import execjs # print(str_content) res = requests.get('http://datamining.comratings.com/exam') cookie = res.cookies print(cookie.values(), type(cookie.values())) str_content = execjs.compile(open(r"demo.js", encoding='utf8').read()).call('reload', cookie.values()[0]) # str_content = execjs.eval(open(r"demo.js", encoding='utf8').read()) print(str_content) name1 = str_content.split(';')[0].split('=')[0] value1 = str_content.split(';')[0].split('=')[1] value2 = str_content.split(';')[1].split('c2=')[1] # name2 = str_content.split(';')[1].split('=')[0] cookie.set(name1, value1) # res2 = requests.get('http://datamining.comratings.com/exam', cookies=cookie) cookie.set('c2', value2) res3 = requests.get('http://datamining.comratings.com/exam3', cookies=cookie) with open('content_exam.html', 'r', encoding='utf8') as f: f.write(res3.text)
浙公网安备 33010602011771号