php curl,爬虫

 1 <?php
 2 
 3 class grabInfo{
 4     public $content;
 5     /*Init 初始化 */
 6     public function __construct($url){
 7         $this->ch=curl_init();
 8         curl_setopt($this->ch,CURLOPT_URL,$url);
 9          curl_setopt($this->ch, CURLOPT_RETURNTRANSFER,1); 
10         curl_setopt($this->ch,CURL_HEADER,1); 
11     }
12 /*get Image Information 保存图片,*/
13     protected function _getImgInfo(){
14         $flagImg='/<\s*img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i';
15         preg_match_all($flagImg,$this->content,$imgArr);
16         $imgInfo=getimagesize($imgArr[2][0]);
17         $fpath=$imgArr[2][0];
18         $this->_getPath($fpath);
19         $imgCreate=str_replace('/', 'createfrom', $imgInfo['mime']);
20         imagepng($img,$fpath.$imgArr[2][0]);
21     }
22 /*create Folder path 创建图片存在路径*/
23     protected function _getPath($fpath='a/b.jpg'){
24         $fpath=str_replace('\\','/',$fpath);
25         $fpath=substr($fpath, 0,strrpos($fpath, '/'));
26         $fpathArr=explode('/', $fpath);
27         foreach ($fpathArr as $key => $value) {    
28             $folderPath.=$value.'/';
29             if(!file_exists($value)){    
30                 mkdir($folderPath);
31                 chmod($folderPath,0777);    
32             }
33         }
34     }
35 /*save fileString to file爬虫页面的内容保存到本地*/
36     protected function _getString(){
37         $filePath='myGrab.php';
38         $handle=fopen($filePath,'w+');
39         fwrite($handle, $this->content);
40         fclose($handle);
41 
42     }
43 /*all operation用户调用的*/
44     public function curlExec(){
45         $this->content=curl_exec($this->ch);
46         $this->_getString();
47         $this->_getImgInfo();
48     }
49 }
50 
51 $grabObj=new grabInfo('127.0.0.1/test.php');
52 $grabObj->curlExec();            

这是我自己刚看,刚写的,有不足的地方,请见两

posted @ 2013-01-28 14:24  尹少爷  阅读(1777)  评论(0编辑  收藏  举报