爬某某药监局数据,

某某药监局数据反扒非常厉害,怎么解决呢,这里提供点思路

(function() {
'use strict';

// Your code here...

//30秒判断一次,是否有页面,否则重新加载页面
setInterval(function () {
let content = document.getElementById('content');
if(content == null || content == '' || typeof(content) == 'undefined'){
console.log('页面没有成功加载,需要刷新');
window.location.href="http://app1.xxxxx/data_nmpa/face3/base.jsp?tableId=41&tableName=TABLE41&title=%D2%A9%C6%B7%BE%AD%D3%AA%C6%F3%D2%B5&bcId=152911863995882985662523838679";
}else{
console.log('页面成功加载,不需要刷新');
}
}, 30000);

})();

(function() {
'use strict';

// Your code here...


let count = getCount();
console.log('初始count = ' + count);
if(count == null || count == '' || typeof(count) == 'undefined'){
count = 1;
}

let total = 45000;

start();

async function start(){
console.log('开始');
while(count <= total){
console.log('进入循环');
console.log('count = ' + count);
await sleep(5000);
//console.log('完成等待');
await getPages();
}
}

async function getPages(){
console.log('进入翻页函数');

console.log('给页数赋值: ' + count);
document.getElementById('goInt').value = count;

let next = document.querySelectorAll("#content input[src = 'images/dataanniu_11.gif']");
//console.log(next);

console.log('点击跳转页数');
next[0].click();

await sleep(5000);

//TODO 判断有没有加载出内容
let boo = true;
let promise = hasLinks();
await promise.then(function (data) {
boo = data;
})
if(boo){
//正常加载
await getLinks();
}

//返回首页
let back = false;
while(back == false){
//返回首页异常,会一直尝试加载
await sleep(5000);
let promise1 = backHome();
await promise1.then(function (data) {
back = data;
})
}
}

async function getLinks(){
console.log('进入链接函数');
let linkA = document.querySelectorAll('#content > table:nth-child(2) > tbody > tr a');
let linkB = document.querySelectorAll('#content > div > table:nth-child(2) > tbody > tr a');
let links = [];

if(linkA != null && linkA != '' && linkA.length > 0){
links = linkA;
}else{
links = linkB;
}

for(var i = 0;i<links.length;i++){
console.log('点击链接');
links[i].click();
//TODO 判断有没有加载出内容
let has = false;
let test = 0;
let promise = hasDetails();
await promise.then(function (data) {
has = data;
})

//console.log('返回的详情判断');
//console.log(has);

//当没有加载出内容时,尝试再点击三次此链接
while(has == false && test < 8){
console.log('点击链接后没有正常加载,进行再次尝试,尝试次数= ' + (test+1));
await sleep(8000);
links[i].click();
//has = await hasDetails();
let promise1 = hasDetails();
await promise1.then(function (data) {
has = data;
})
//console.log('返回的详情判断');
//console.log(has);
test ++;
}
await sleep(5000);
}
count ++;
await setCount(count);
}

async function hasLinks(){
let linkA = document.querySelectorAll('#content > table:nth-child(2) > tbody > tr a');
let linkB = document.querySelectorAll('#content > div > table:nth-child(2) > tbody > tr a');
let links = [];

if(linkA != null && linkA != '' && linkA.length > 0){
links = linkA;
}else{
links = linkB;
}
if(links.length > 0){
return true;
}else{
return false;
}
}

async function hasDetails(){
let list = document.querySelectorAll(".listmain table[align='center']");
if(list != null && list != '' && list.length > 0){
//console.log('有详情');
return true;
}else{
//console.log('没有详情');
return false;
}
}


async function backHome(){
console.log('返回首页');
window.location.href="http://app1.xxxx/data_nmpa/face3/base.jsp?tableId=41&tableName=TABLE41&title=%D2%A9%C6%B7%BE%AD%D3%AA%C6%F3%D2%B5&bcId=152911863995882985662523838679";
await sleep(5000);
let content = document.getElementById('content');
if(content != null && content != '' && typeof(content) != 'undefined'){
//console.log('正常加载');
return true;
}else{
//console.log('异常加载');
return false;
}
}

function getCount(){
let myCount = window.localStorage.getItem('MyCount');
// if(myCount == null){
// return 1;
// }else{
// return myCount;
// }
return myCount;
}

async function setCount(count){
window.localStorage.setItem('MyCount', count);
}

async function sleep(ms) {
console.log('进入等待');
return new Promise(resolve => setTimeout(resolve, ms))
}
})();

 

posted @ 2021-03-13 12:08  jockshu  阅读(88)  评论(0)    收藏  举报