利用phantomjs伪装Android浏览器,解析页面上的URL

 

 1 var page = require('webpage').create();
 2 var system = require('system');
 3 
 4 if ( system.args.length == 1 )
 5 {
 6     console.log('Usage: phantomjs crawler.js <SOME-URL>');
 7 }
 8 page.settings.userAgent = 'Mozilla/5.0 (Linux; U; Android 4.2.2; zh-cn; Galaxy Nexus Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30';
 9 page.settings.loadImages = false;
10 
11 page.open(system.args[1], function(status)
12 {
13     if ( status == 'success' )
14     {
15         var links = page.evaluate(function ()
16         {
17             var array = [];
18             var links = document.getElementsByTagName("a");
19             for(var i=0; i<links.length; i++)
20             {
21                 array.push(links[i].href);
22             }
23             return array;
24         });
25         for ( var i = 0; i < links.length; ++i )
26         {
27             console.log(links[i]);
28         }
29     }
30     phantom.exit();
31 })

 

posted @ 2014-04-28 09:08  sheeta  阅读(741)  评论(0)    收藏  举报