php curl 抓取

 1 <?php
 2 
 3  set_time_limit(0);
 4 function curl_multi($urls) {  
 5     if (!is_array($urls) or count($urls) == 0) {  
 6         return false;  
 7     }   
 8     $num=count($urls);  
 9     $curl = $curl2 = $text = array();  
10     $handle = curl_multi_init();  
11     function createCh($url) {  
12         $ch = curl_init();  
13         curl_setopt ($ch, CURLOPT_URL, $url);  
14         curl_setopt ($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko');//设置头部  
15         curl_setopt ($ch, CURLOPT_REFERER, $url); //设置来源  
16         curl_setopt ($ch, CURLOPT_ENCODING, "gzip"); // 编码压缩  
17         curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);  
18         curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);//是否采集301、302之后的页面  
19         curl_setopt ($ch, CURLOPT_MAXREDIRS, 5);//查找次数,防止查找太深  
20         curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); // 对认证证书来源的检查  
21         curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); // 从证书中检查SSL加密算法是否存在         
22         curl_setopt ($ch, CURLOPT_TIMEOUT, 20);  
23         curl_setopt ($ch, CURLOPT_HEADER, 0);//输出头部  
24         return $ch;  
25     }  
26     foreach($urls as $k=>$v){  
27         $url=$urls[$k];  
28         $curl[$k] = createCh($url);  
29         curl_multi_add_handle ($handle,$curl[$k]);  
30     }  
31     $active = null;  
32     do {  
33         $mrc = curl_multi_exec($handle, $active);  
34     } while ($mrc == CURLM_CALL_MULTI_PERFORM);  
35   
36     while ($active && $mrc == CURLM_OK) {  
37         if (curl_multi_select($handle) != -1) {  
38             usleep(100);  
39         }  
40         do {  
41             $mrc = curl_multi_exec($handle, $active);  
42         } while ($mrc == CURLM_CALL_MULTI_PERFORM);  
43     }   
44   
45     foreach ($curl as $k => $v) {  
46         if (curl_error($curl[$k]) == "") {  
47             $text[$k] = (string) curl_multi_getcontent($curl[$k]);   
48         }  
49         curl_multi_remove_handle($handle, $curl[$k]);  
50         curl_close($curl[$k]);  
51     }   
52     curl_multi_close($handle);  
53     return $text;  
54 }  
55 $urls=array('http://www.baidu.com',  
56             'http://www.baidu.com',  
57             'http://www.baidu.com',  
58             'http://www.baidu.com',  
59             'http://www.baidu.com',  
60             'http://www.baidu.com',  
61             'http://www.baidu.com',  
62             'http://www.baidu.com'  
63             );  
64 $res=curl_multi($urls);  
65 print_r($res);

 

posted @ 2017-09-21 09:34  wujunbin  阅读(262)  评论(0编辑  收藏  举报