QUERY数据采集 + 详情采集

采集数据+七牛云存储图片

//爬虫
    public function reptile()
    {
        require_once base_path(). "/public/static/QueryList/phpQuery.php";
        require_once base_path(). "/public/static/QueryList/QueryList.php";
        $range=range(1,3);
        foreach ($range as $val){
            $url='https://news.ke.com/bj/baike/0033/'.'pg'.$val.'/';
            $data=@QueryList::Query($url,[
                'img'=>['.item>a>img','data-original','',function($item){
            $img=  pathinfo($item,PATHINFO_EXTENSION );
            $img='hy_'.rand(1,999).'.'.$img;
            $lots=base_path().'/public/uploads/img/'.$img;
            return $lots;
            $image= file_put_contents($lots,file_get_contents($item));
//            上传七牛云
            $accessKey="***";
            $secretKey="***";
            $uploadMgr=new UploadManager();
            $auth = new Auth($accessKey, $secretKey);
            $token = $auth->uploadToken('jmin');
            list($ret, $error) = $uploadMgr->putFile($token, $image,$lots);
                    if ($error !== null) {
                        return false;
                    } else {
//                    回调上传后图片名在框架中可以直接展示
//                        var_dump('http://min.clouddn.com/'.'/'.$ret['key']);
                        return  'http://min.clouddn.com/'.'/'.$ret['key'];
                    }
                }],
                'title'=>['.tit','html'],
                'desn' => ['.text>.summary','text'],
                'rul' => ['.text>a', 'href'],
            ])->data;
//            将采集的数据添加入库
            Article::insert($data);

        }
    }

采集详情页

//根据详情链接采集详情页
    public function gather()
    {
        $data=Article::get(['id','rul'])->toArray();
        require_once base_path(). "/public/static/QueryList/phpQuery.php";
        require_once base_path(). "/public/static/QueryList/QueryList.php";
        foreach ($data as $val){
            $id=$val['id'];
            $data=@QueryList::Query($val['rul'],[
                'body' => ['.m-article','html'],
                'name' => ['.author','text']
            ])->data;
        //    获取发布者名字
            $name=$data[0]['name'];
        //    获取发布者文章
            $body=$data[0]['body'];
        //    修改语句
            Article::where('id',$id)->update(['name'=>$name,'body'=>$body]);

        }
    }

 

posted @ 2021-06-19 11:56  jerry_min  阅读(115)  评论(0)    收藏  举报