'use strict';
let request = require('request')
let cheerio = require('cheerio'); //爬虫
let iconv = require('iconv-lite'); //处理gbk编码的网页
let Entities = require('html-entities').XmlEntities;
let entities = new Entities();
const fs = require('fs')
const path = require('path')
const host = 'http://www.quanshuwang.com/shuku/'
const db = uniCloud.database()
const collection = db.collection('repiles-book')
const dbCom = db.command
exports.main = async (event, context) => {
// 开始抓取首页链接
let indexArr = []
//发送请求获取页面内容
var body = await requestFn()
var $ = cheerio.load(body);
//兼容网页编码格式
if ($('meta[charset]').attr('charset') == 'utf-8') { //如果网页是utf-8的编码
} else { //如果网页是gbk的编码
body = iconv.decode(body, 'gbk'); //转换gbk编码的页面内容
$ = cheerio.load(body);
}
//处理网页数据 获取排行列表
let list = $('.yd-book-content .tab-item').find('.yd-book-item')
for (var i = 0; i < list.length; i++) {
let href = list.eq(i).find('a').attr("href")
//获取书的id
let index = href.indexOf('_') + 1
let index2 = href.lastIndexOf('.')
let bookId = href.slice(index, index2) //书ID
//获取书的封面
let bookImageSrc = list.eq(i).find('img').attr("src")
//获取书的标题 注意使用html-entities解码
let bookName = entities.decode(list.eq(i).find('h2').html())
//获取书的作者 注意使用html-entities解码
let bookAuthor = entities.decode(list.eq(i).find('.dl-horizontal-inline p').html())
console.log('书的封面:' + bookImageSrc);
console.log('书的标题:' + bookName);
console.log('书的作者:' + bookAuthor);
console.log('书的id:' + bookId);
let obj={
bookImageSrc,
bookName,
bookAuthor,
bookId,
}
saveContent(obj)
}
console.log('新增文章数量:', indexArr.length);
// 循环抓取每个新文章详情页
// if (indexArr.length > 0) {
// for (let i = 0; i < indexArr.length; i++) {
// let href = list.eq(indexArr[i]).attr("href")
// let imgSrc = list.eq(indexArr[i]).find('img').attr('src')
// let title = list.eq(indexArr[i]).find('.title').text()
// await saveArticle(href, title, imgSrc)
// }
// }
//返回数据给客户端
return event
};
function saveContent(obj) {
//获取要写入文件的文件夹路径(以书名当文件夹)
let pathBook=path.join(__dirname, `../../bookData/${obj.bookName}`)
//判断书名文件夹是否存在,不存在则创建
if (!fs.existsSync(pathBook)) {
fs.mkdirSync(pathBook)
}
//写入json文件
fs.writeFile(`${pathBook}/chapter1.json`, JSON.stringify(obj), 'utf-8', err => {
if (err) throw err
})
}
function requestFn() {
return new Promise((resolve, reject) => {
request({
url: host,
encoding: null //设置抓取页面时不要对数据做任何转换
}, function(err, res, body) {
if (err) {
reject(err)
} else {
resolve(body)
}
});
})
}