爬虫新纪录

编码

header("Content-Type:text/html;charset=utf-8");

//时区
date_default_timezone_set("PRC");



ignore_user_abort(); //关掉浏览器,PHP脚本也可以继续执行.
set_time_limit(0); // 通过set_time_limit(0)可以让程序无限制的执行下去


ob_flush(); //输出缓冲
flush(); 
 

 

 

写入日志

//写入日志date('m_d')
function my_log($data){
   $log_file = date('_m_d_',time()).'.log';
   $content =var_export($data,TRUE);
   $content .= "\r\n";
   file_put_contents($log_file,$content, FILE_APPEND);
}

 

爬虫curl

function _grab($curl,$postInfo='',$cookie='',$referer='',$userAgent=''){
     $ch = curl_init();  
     curl_setopt($ch, CURLOPT_URL, $curl);  
     //不输出头
     curl_setopt($ch, CURLOPT_HEADER, 0);   
     //以字符串返回获取的信息,不直接输出
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     //如果是https链接,不验证证书
     if(preg_match('/https/i', $curl)){
         curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
     }
     //POST
     if($postInfo){
        curl_setopt($ch,CURLOPT_POST,1);
        curl_setopt($ch,CURLOPT_POSTFIELDS,$postInfo);
     }
     //加入cookie
     if($cookie){
         curl_setopt($ch,CURLOPT_COOKIE,$cookie);
     }
     //模拟来路
     if($referer){
         curl_setopt($ch, CURLOPT_REFERER, $referer);
     }
     //模拟环境
     if($userAgent){
         curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
     }
     //执行
     $content = curl_exec($ch);  
     //错误处理
     if ($content  === false) {  
       return "网络请求出错: " . curl_error($ch);  
       exit();  
     }  
     return $content;
}    

 

 

//一行行读取文件

function readKeyword(){

    $data = [];
    $f= fopen("danci.txt","r");
    $count = 0;
    while (!feof($f))
    {
      $line = fgets($f);
      $line = substr($line, 0,6);
      $data[] = $line;
    }
    fclose($f);
    return $data;
}

 

posted @ 2018-08-30 17:23  cl94  阅读(124)  评论(0)    收藏  举报