爬虫新纪录
编码 header("Content-Type:text/html;charset=utf-8"); //时区 date_default_timezone_set("PRC"); ignore_user_abort(); //关掉浏览器,PHP脚本也可以继续执行. set_time_limit(0); // 通过set_time_limit(0)可以让程序无限制的执行下去
ob_flush(); //输出缓冲 flush();
写入日志
//写入日志date('m_d') function my_log($data){ $log_file = date('_m_d_',time()).'.log'; $content =var_export($data,TRUE); $content .= "\r\n"; file_put_contents($log_file,$content, FILE_APPEND); }
爬虫curl
function _grab($curl,$postInfo='',$cookie='',$referer='',$userAgent=''){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $curl); //不输出头 curl_setopt($ch, CURLOPT_HEADER, 0); //以字符串返回获取的信息,不直接输出 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //如果是https链接,不验证证书 if(preg_match('/https/i', $curl)){ curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); } //POST if($postInfo){ curl_setopt($ch,CURLOPT_POST,1); curl_setopt($ch,CURLOPT_POSTFIELDS,$postInfo); } //加入cookie if($cookie){ curl_setopt($ch,CURLOPT_COOKIE,$cookie); } //模拟来路 if($referer){ curl_setopt($ch, CURLOPT_REFERER, $referer); } //模拟环境 if($userAgent){ curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); } //执行 $content = curl_exec($ch); //错误处理 if ($content === false) { return "网络请求出错: " . curl_error($ch); exit(); } return $content; }
//一行行读取文件
function readKeyword(){ $data = []; $f= fopen("danci.txt","r"); $count = 0; while (!feof($f)) { $line = fgets($f); $line = substr($line, 0,6); $data[] = $line; } fclose($f); return $data; }

浙公网安备 33010602011771号