curl代理IP采集

<?php
include 'curl2.php';
include 'phpQuery.php';
include 'QueryList.php';
set_time_limit(0);
//修改最大执行时间 
ini_set("max_execution_time", 2400); // s 40 分钟 
//修改此次的最大运行内存 
ini_set("memory_limit", 1048576000); // Byte 1000 兆,即 1G 
use QL\QueryList;

//$url = 'https://www.kuaidaili.com/free/inha/';
$url = 'https://www.socks-proxy.net/';
//$url = 'http://www.gatherproxy.com';

$c = new cURL();

$rules = array(
    //采集id为one这个元素里面的纯文本内容
    'ip' => array('.table tbody tr td::nth-child(1)','text'),
    'port'=>array('.table tbody tr td::nth-child(2)','text')
);

/*$abc = [
    'a'
];
$key = key($abc);
unset($abc[$key]);
$key = key($abc);
var_dump($key);
var_dump($abc[$key]);*/
//exit;



//$content = $c->get($url);

//file_put_contents('fetch.txt', $content);
$content = file_get_contents('fetch.txt');
$data = QueryList::Query($content,$rules)->getData();
print_r($data);

$proxyStr = '';
foreach ($data as $k => $v) 
{
    $proxy = $v;
    //if($k>20) break;
    $socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP);
    if( $socket == FALSE ) 
    {
        echo 'create fail: ' . socket_strerror(socket_last_error());
    } 

    // 2. 链接
    $result = socket_connect($socket, $proxy['ip'], $proxy['port']);
    if($result)
    {
        $proxyStr .= $proxy['ip'].':'.$proxy['port'] ."\n";
    }

    socket_close($socket);
    
}


var_dump(file_put_contents('proxy.txt', $proxyStr));
//var_dump($content);
//var_dump(file_get_contents($url));
/*$url = 'https://www.kuaidaili.com/free/inha/';
$c = new cURL();
*/

 

posted on 2018-01-19 15:59  小乔流水人家  阅读(208)  评论(0)    收藏  举报

导航