前置:
1.安装好了nodejs
2.npm i cheerio
3.jquery选择器基础知识
var https = require('https')
var cheerio = require('cheerio')
// 简书首页
var url = "https://www.jianshu.com/"
function filterSection(html) {
//加载爬到的html结构
var $ = cheerio.load(html)
var noteList = $('.note-list')
var sectionData = []
noteList.each(function(item) {
var content = $(this).find('.content')
// note 作者
var author = content.find('.author').find('.info').text()
// note title
var title = content.find('.title').text()
// note 内容摘要
var abstract = content.find('.abstract').text()
var noteListData = {
author: author,
title: title,
abstract: abstract
}
sectionData.push(noteListData)
})
return sectionData
}
function print(sectionData) {
sectionData.forEach((item) => {
var author = item.author,
title = item.title,
abstract = item.abstract;
console.log(author + '\n' + title + '\n' + abstract + '\n')
})
}
https.get(url, (res) => {
var html = ''
res.on('data', (data) => {
html += data
})
res.on('end', () => {
var sectionData = filterSection(html)
print(sectionData)
}).on('error', () => {
console.log('错误')
})
})