pa

var Nightmare = require("nightmare");
const fs = require("fs");

const open = function (page) {
  var nightmare = Nightmare({
    show: false,
    executionTimeout: 1000 * 60 * 100,
    waitTimeout: 1000 * 60 * 100,
  });
  nightmare
    .goto(`https://china.findlaw.cn/laodongfa/shangyemimi/index_${page}.html`)
    .wait(".footer-layout")
    .evaluate(function () {
      console.log("执行函数");
      let list = document.querySelectorAll(".article-list")[0].children;
      let s = "";
      for (let i = 0; i < list.length; i++) {
        try {
          let href = list[i].children[0].href;
          let text = list[i].children[1].children[0].innerText;
          let reading =
            list[i].children[1].children[1].children[1].textContent.match(
              /\d+.\d+/
            )[0];
          s = s + `'${href}'-'${text}'-'${reading}'||`;
          //   arr.push({
          //     href: list[i].children[0].href,
          //     text: list[i].children[1].children[0].innerText,
          //     reading:
          //       list[i].children[1].children[1].children[1].textContent.match(
          //         /\d+.\d+/
          //       )[0],
          //   });
        } catch (e) {}
      }
      return s;
    })
    .end()
    .then(function (result) {
      //   console.log(result);
      fs.appendFile("./log.text", result, (err) => {
        if (err) throw err;
      });
    })
    .catch(function (error) {
      console.error("Search failed:", error);
    });
};

function sleep(time) {
  return new Promise((resolve) => setTimeout(resolve, time));
}

function run() {
  let page = 1;
  fs.writeFile("./log.text", "", (err) => {
    if (err) throw err;
  });

  console.time("爬取耗时:");
  for (page; page <= 37; page++) {
    open(page);
    sleep(3000);
  }
  console.timeEnd("爬取耗时:");
}

run();

posted @ 2024-01-05 14:03  lambertlt  阅读(13)  评论(0)    收藏  举报