Node_初步了解(4)小爬虫

 1 var http=require('http');
 2 var cheerio=require('cheerio');
 3 var url='http://www.cnblogs.com/Lwd-linux/archive/2017/01.html';
 4 
 5 //获取
 6 function filterChapters(html){
 7     var $=cheerio.load(html);
 8 
 9     var chapters=$('.entrylistItem');
10 
11     var courseData=[];
12     chapters.each(function(){
13         var chapter=$(this);
14         var chapterTitle=chapter.find('.entrylistItemTitle').text();
15         var summary=chapter.find('.c_b_p_desc').text();
16         var chapterData={
17             chapterTitle:chapterTitle,
18             summary:summary
19         };
20         courseData.push(chapterData);
21     })
22 
23     
24     return courseData;
25 }
26 //打印
27 function printCourseInfo(courseData){
28     courseData.forEach(function(item){
29         var chapterTitle=item.chapterTitle;
30         console.log(chapterTitle+'\n');
31         var summary=item.summary;
32         console.log(summary+'\n');
33     })
34 }
35 
36 
37 http.get(url,function(res){
38     var html='';
39 
40     res.on('data',function(data){
41         html+=data;
42     })
43 
44     res.on('end',function(){
45         var courseData=filterChapters(html);
46         printCourseInfo(courseData);
47     }).on('error',function(){
48         console.log('获取数据出错!');
49     })
50 })

 

posted @ 2017-07-18 11:14  忍冬。  阅读(137)  评论(0)    收藏  举报