const sp = require('superagent');
const cheerio = require('cheerio');
const fs = require('fs')
// 定义请求的URL地址
const BASE_URL = 'http://www.ddxsku.com/';
// 1. 发送请求,获取HTML字符串
async function getContent () {
let res = await sp.get(BASE_URL);
console.log(res)
// 将请求到的HTML保存到'./index.html'
fs.writeFile('./index.html',res.text,(err) => {
if(err) return console.log('HTML保存失败')
console.log('HTML保存成功')
})
// 2. 将字符串导入,使用cheerio获取元素
let $ = cheerio.load(res.text);
// 3. 获取指定的元素
let books = []
$('#s_dd dd').each(function (i,el) {
let info = {
link: $(el).find('a').eq(0).attr('href'),
name: $(el).find('a').eq(1).text(),
image: $(el).find('img').eq(0).attr('src')
}
books.push(info)
// 将爬取的数组保存到'./test.txt'
const stream = fs.createWriteStream('./test.txt')
stream.write(JSON.stringify(books))
})
books.forEach(async (book,index) => {
const res = await sp.get(book.image)
console.log(res.body,index)
if(!fs.existsSync('./img')) {
fs.mkdirSync('./img')
}
// 流的方式写入文件 性能更好
const stream = fs.createWriteStream(`./img/${index}.png`)
stream.write(res.body)
// fs.writeFileSync(`./img/${index}.txt`,res.body)
})
}
getContent()```