php curl采集静态资源

<?php
include 'curl2.php';
include 'phpQuery.php';
include 'QueryList.php';
set_time_limit(0);

use QL\QueryList;
function getFile($path,$url)
{
    $ch = curl_init(); 

    $fp=fopen($path, 'w');

    curl_setopt($ch, CURLOPT_URL, $url); 
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60); 
    curl_setopt($ch, CURLOPT_FILE, $fp); 

    $output = curl_exec($ch); 
    $info = curl_getinfo($ch);

    fclose($fp);

    $size = filesize($path);
    if ($size != $info['size_download']) {
        echo $url.$path."下载的数据不完整,请重新下载<br/>";
    } else {
        echo $url.$path."下载数据完整<br/>";
    }

    curl_close($ch); 
}

$domain = 'http://demo.sucaihuo.com/';
$resourceUrl = $domain.'129/';
$saveRoot = '../Ninthpalacedraw/';
$c = new cURL();
$content = $c->get($resourceUrl);

file_put_contents('fetch.txt', $content);
$content = file_get_contents('fetch.txt');
$rules = array(
    //采集id为one这个元素里面的纯文本内容
    'link' => array('link','href'),
    'img'=>array('img ','src'),
    'script'=>array('script ','src')
);

$data = QueryList::Query($content,$rules)->getData();
//print_r($data);
$resource = '';
foreach ($data as $key => $value) 
{
    foreach ($value as $k => $v) 
    {
        if(!$v) continue;

        if(strpos($v, 'http') !== false)
        {
            //$resource = $c->get($v);
            //$resource =file_get_contents($v);
            $url= $v;
            $uri = mb_substr($v, strpos($v, '.com/')+4);
        }
        else
        {
            //$resource = $c->get($domain.$v);
            //$resource = file_get_contents($domain.$v);
            $url = $resourceUrl.$v;
            $uri = $v;
        }
        
        
            $uri = explode('/',$uri);
            $filename = end($uri);
            unset($uri[key($uri)]);
            reset($uri);
            $path = $saveRoot;
            foreach ($uri as $uk => $uv) {
                $path.=$uv.'/';
                if(!file_exists($path))
                {
                    if(!mkdir($path))
                    {
                        echo $path.'目录创建失败<br />';
                    }
                }
            }
            $resource = $c->get($url);
            
            file_put_contents($path.$filename, $resource);
            //getFile($path.$filename, $url);
        
    }
    

}
file_put_contents($saveRoot.'lottery.html', $content);
//print_r($data);

 

posted on 2018-02-23 09:57  小乔流水人家  阅读(436)  评论(0)    收藏  举报

导航