QUERY数据采集 + 详情采集
采集数据+七牛云存储图片
//爬虫 public function reptile() { require_once base_path(). "/public/static/QueryList/phpQuery.php"; require_once base_path(). "/public/static/QueryList/QueryList.php"; $range=range(1,3); foreach ($range as $val){ $url='https://news.ke.com/bj/baike/0033/'.'pg'.$val.'/'; $data=@QueryList::Query($url,[ 'img'=>['.item>a>img','data-original','',function($item){ $img= pathinfo($item,PATHINFO_EXTENSION ); $img='hy_'.rand(1,999).'.'.$img; $lots=base_path().'/public/uploads/img/'.$img; return $lots; $image= file_put_contents($lots,file_get_contents($item)); // 上传七牛云 $accessKey="***"; $secretKey="***"; $uploadMgr=new UploadManager(); $auth = new Auth($accessKey, $secretKey); $token = $auth->uploadToken('jmin'); list($ret, $error) = $uploadMgr->putFile($token, $image,$lots); if ($error !== null) { return false; } else { // 回调上传后图片名在框架中可以直接展示 // var_dump('http://min.clouddn.com/'.'/'.$ret['key']); return 'http://min.clouddn.com/'.'/'.$ret['key']; } }], 'title'=>['.tit','html'], 'desn' => ['.text>.summary','text'], 'rul' => ['.text>a', 'href'], ])->data; // 将采集的数据添加入库 Article::insert($data); } }
采集详情页
//根据详情链接采集详情页 public function gather() { $data=Article::get(['id','rul'])->toArray(); require_once base_path(). "/public/static/QueryList/phpQuery.php"; require_once base_path(). "/public/static/QueryList/QueryList.php"; foreach ($data as $val){ $id=$val['id']; $data=@QueryList::Query($val['rul'],[ 'body' => ['.m-article','html'], 'name' => ['.author','text'] ])->data; // 获取发布者名字 $name=$data[0]['name']; // 获取发布者文章 $body=$data[0]['body']; // 修改语句 Article::where('id',$id)->update(['name'=>$name,'body'=>$body]); } }

浙公网安备 33010602011771号