curl采集封装
1.
<?php class cURL { var $headers; var $user_agent; var $compression; var $cookie_file; var $proxy; protected $refer; function cURL($cookies=TRUE,$cookie='cookies.txt',$compression='gzip',$proxy='') { $this->headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg'; $this->headers[] = 'Connection: Keep-Alive'; $this->headers[] = 'CLIENT-IP:208.165.188.173'; $this->headers[] = 'X-FORWARDED-FOR:208.165.188.172'; $this->user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'; $this->compression=$compression; //$this->proxy=$proxy; $this->cookies=$cookies; if ($this->cookies == TRUE) $this->cookie($cookie); } function setHeader($headers){ $this->headers[] = headers; } function setRefer($refer){ $this->refer = $refer; } function setProxy($proxy){ $this->proxy = $proxy; } function cookie($cookie_file) { if (file_exists($cookie_file)) { $this->cookie_file=$cookie_file; } else { fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions'); $this->cookie_file=$cookie_file; fclose($this->cookie_file); } } function get($url) { $process = curl_init(trim($url)); curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers); curl_setopt($process, CURLOPT_HEADER, 0); curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent); curl_setopt($process, CURLOPT_REFERER, $this->refer); //来源 curl_setopt($process,CURLOPT_ENCODING , $this->compression); curl_setopt($process, CURLOPT_TIMEOUT, 30); curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1); // curl_setopt($process, CURLOPT_CONNECTTIMEOUT, 60); if ($this->cookies == TRUE) { curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file); curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file); } if ($this->proxy) { curl_setopt($process, CURLOPT_PROXY, $this->proxy); //代理IP curl_setopt($process, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); } if(mb_substr($url, 0,5) === 'https') { //https 取消证书验证 curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false); } $return = curl_exec($process); if($return === false) { echo 'Curl error: ' . curl_error($process); } curl_close($process); return $return; } function post($url,$data) { $process = curl_init($url); $data = http_build_query($data); curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers); curl_setopt($process, CURLOPT_HEADER, 0); curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent); curl_setopt($process, CURLOPT_REFERER, $this->refer); //来源 curl_setopt($process, CURLOPT_ENCODING , $this->compression); curl_setopt($process, CURLOPT_TIMEOUT, 30); curl_setopt($process, CURLOPT_POSTFIELDS, $data); curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($process, CURLOPT_POST, 1); if ($this->cookies == TRUE) { curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file); curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file); } if ($this->proxy) { curl_setopt($process, CURLOPT_PROXY, $this->proxy); //代理IP curl_setopt($process, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); } if(mb_substr($url, 0,5) === 'https') { //https 取消证书验证 curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false); } $return = curl_exec($process); curl_close($process); return $return; //初始化 /* $ch = curl_init(); //设置变量 curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);//执行结果是否被返回,0是返回,1是不返回 curl_setopt($ch, CURLOPT_HEADER, 0);//参数设置,是否显示头部信息,1为显示,0为不显示 curl_setopt($ch, CURLOPT_REFERER, $http_url); //表单数据,是正规的表单设置值为非0 curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_TIMEOUT, 1);//设置curl执行超时时间最大是多少 //使用数组提供post数据时,CURL组件大概是为了兼容@filename这种上传文件的写法, //默认把content_type设为了multipart/form-data。虽然对于大多数web服务器并 //没有影响,但是还是有少部分服务器不兼容。本文得出的结论是,在没有需要上传文件的 //情况下,尽量对post提交的数据进行http_build_query,然后发送出去,能实现更好的兼容性,更小的请求数据包。 curl_setopt($ch, CURLOPT_POSTFIELDS, $data); //执行并获取结果 $output = curl_exec($ch); if($outopt === FALSE) { echo "<br/>","cUrl Error:".curl_error($ch); } // 释放cURL句柄 curl_close($ch); */ } function error($error) { echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>"; die; } } ?>
面向对象:
<?php class mycurl { protected $_useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1'; protected $_url; protected $_followlocation; protected $_timeout; protected $_maxRedirects; protected $_cookieFileLocation = './cookie.txt'; protected $_post; protected $_postFields; protected $_referer ="http://www.midnightvip.com"; protected $_session; protected $_webpage; protected $_includeHeader; protected $_noBody; protected $_status; protected $_binaryTransfer; public $authentication = 0; public $auth_name = ''; public $auth_pass = ''; /** * @api {public} mycurl/__construct 01.构造函数 * @apiGroup mycurl * @apiName 构造函数 * @apiParam {string} url 请求链接 * @apiParamExample {string} 请求链接 * https://www.baidu.com * @apiParam {boolean} followlocation TRUE要遵循 服务器作为HTTP标头一部分发送的“Location:”标题(注意这是递归的,除非设置,否则PHP将按照发送它的许多“Location:”标题执行CURLOPT_MAXREDIRS)。 * @apiParamExample {string} 允许抓取多重跳转 * true * @apiParam {string} timeOut 请求超时时间 * @apiParamExample {string} 请求超时时间秒计 * 30 * * @apiParam {integer} maxRedirecs 遵循的HTTP重定向的最大数量。一起使用这个选项CURLOPT_FOLLOWLOCATION。 * @apiParamExample {integer} 重定向的最大数量 * 4 * @apiParam {boolean} binaryTransfer 已弃用 * * @apiParam {boolean} includeHeader TRUE 在输出中包含标题。 * @apiParamExample {boolean} 在输出中包含标题 * false * @apiParam {boolean} noBody 只请求头部 * @apiParamExample {boolean} 只请求头部 * false */ public function __construct($url,$followlocation = true,$timeOut = 30,$maxRedirecs = 4,$binaryTransfer = false,$includeHeader = false,$noBody = false) { $this->_url = $url; $this->_followlocation = $followlocation; $this->_timeout = $timeOut; $this->_maxRedirects = $maxRedirecs; $this->_noBody = $noBody; $this->_includeHeader = $includeHeader; $this->_binaryTransfer = $binaryTransfer; $this->_cookieFileLocation = dirname(__FILE__).'/cookie.txt'; } /** * @api {public} mycurl/useAuth 02.是否需要登录 * @apiGroup mycurl * @apiName 是否需要登录 * @apiParam {boolean} use 是否需要登录 * @apiParamExample {boolean} 需要登录 * true */ public function useAuth($use){ $this->authentication = 0; if($use == true) $this->authentication = 1; } /** * @api {public} mycurl/setName 03.设置登录用户名 * @apiGroup mycurl * @apiName 设置登录用户名 * @apiParam {string} name 用户名 * @apiParamExample {string} 用户名 * admin */ public function setName($name){ $this->auth_name = $name; } /** * @api {public} mycurl/setPass 04.设置登录用户密码 * @apiGroup mycurl * @apiName 设置登录用户密码 * @apiParam {string} name 密码 * @apiParamExample {string} 密码 * 123456 */ public function setPass($pass){ $this->auth_pass = $pass; } /** * @api {public} mycurl/setReferer 05.设置http “Referer:”标头的内容 * @apiGroup mycurl * @apiName 设置http “Referer:”标头的内容 * @apiParam {string} referer 来源 * @apiParamExample {string} 来源 * www.baidu.com */ public function setReferer($referer){ $this->_referer = $referer; } /** * @api {public} mycurl/setCookiFileLocation 06.保存所有内部cookie到文件的名称 * @apiGroup mycurl * @apiName 保存所有内部cookie到文件的名称,例如在curl_close调用之后。 * @apiParam {string} path 存储位置 * @apiParamExample {path} 存储位置 * e://cookie.txt */ public function setCookiFileLocation($path) { $this->_cookieFileLocation = $path; } /** * @api {public} mycurl/setPost 07.设置post发送请求数据 * @apiGroup mycurl * @apiName 设置post发送请求数据。 * @apiParam {array} postFields 发送请求数据 * @apiParamExample {array} 发送请求数据 * array( * 'username'=>'123', * 'password'=>'123456' * ) */ public function setPost ($postFields) { $this->_post = true; $this->_postFields = $postFields; } public function setUserAgent($userAgent) { $this->_useragent = $userAgent; } public function createCurl($url = 'nul') { if($url != 'nul'){ $this->_url = $url; } $s = curl_init(); curl_setopt($s,CURLOPT_URL,$this->_url); curl_setopt($s,CURLOPT_HTTPHEADER,array('Expect:')); curl_setopt($s,CURLOPT_TIMEOUT,$this->_timeout); curl_setopt($s,CURLOPT_MAXREDIRS,$this->_maxRedirects); curl_setopt($s,CURLOPT_RETURNTRANSFER,true); curl_setopt($s,CURLOPT_FOLLOWLOCATION,$this->_followlocation); curl_setopt($s,CURLOPT_COOKIEJAR,$this->_cookieFileLocation); curl_setopt($s,CURLOPT_COOKIEFILE,$this->_cookieFileLocation); if($this->authentication == 1){ curl_setopt($s, CURLOPT_USERPWD, $this->auth_name.':'.$this->auth_pass); } if($this->_post) { curl_setopt($s,CURLOPT_POST,true); curl_setopt($s,CURLOPT_POSTFIELDS,$this->_postFields); } if($this->_includeHeader) { curl_setopt($s,CURLOPT_HEADER,true); } if($this->_noBody) { curl_setopt($s,CURLOPT_NOBODY,true); } /* if($this->_binary) { curl_setopt($s,CURLOPT_BINARYTRANSFER,true); } */ curl_setopt($s,CURLOPT_USERAGENT,$this->_useragent); curl_setopt($s,CURLOPT_REFERER,$this->_referer); $this->_webpage = curl_exec($s); $this->_status = curl_getinfo($s,CURLINFO_HTTP_CODE); curl_close($s); } public function getHttpStatus() { return $this->_status; } public function __tostring(){ return $this->_webpage; } }
浙公网安备 33010602011771号