Node.js 后端DOM操作

前提

与浏览器DOM具有一致接口的JsDOM https://github.com/jsdom/jsdom
与jQuery有类似接口的CheerIO https://github.com/cheeriojs/cheerio
都可以做DOM操作

JSOM

文档:https://airbnb.io/enzyme/docs/guides/jsdom.html

一段抓取网页数据的代码:

const fs = require('fs');
const { JSDOM } = require('jsdom');
const jquery = require('jquery');
const { log, table, } = console;

function get(url, callback) {
    try {
        JSDOM.fromURL(url).then(jsenv => {
            const $ = jquery(jsenv.window);
            const title = $('a#thread_subject')[0];
            let result = title.innerHTML + '\n\n\n';
            const ct = $('div#postlist > div > table .t_fsz')[0];
            result += ct.querySelector('td.t_f').innerHTML;
            result = result.replace(/(?:<br\s*>|&nbsp;)/g, '') .replace(/<img src="([^"]*)" .*>/g, '[img]$1[/img]') .replace(/<font.*>.*<\/font>\n*/g, '\n') .replace(/<a.*>\n*/g, '') .replace(/(?:<\/a>\n|\n\n)/g, '\n');
            try {
                const link = ct.querySelector('p.attnm > a').href;
                result += '\n\n下载地址:' + link;
            } catch(e) {}
            callback(result);
        });
    } catch (e) {
        console.log(e);
    }
}

END

posted @ 2020-02-11 14:21  develon  阅读(1354)  评论(0编辑  收藏  举报