php xpath解析
<?php
require './vendor/autoload.php';
$client = new GuzzleHttp\Client();
$url = "https://www.cnblogs.com/brady-wang/";
$res = $client->request('GET', $url);
echo $res->getStatusCode();
$dom = new DOMDocument();
$dom->normalize();
// load html into document object model
@$dom->loadHTML($res->getBody()->getContents());
// create domxpath instance
$xPath = new DOMXPath($dom);
$elements = $xPath->query('//a[contains(@class,"postTitle")]/@*');
foreach ($elements as $e) {
echo $e->nodeName ." : ". $e->nodeValue .PHP_EOL;
}
for($i=0;$i<$elements->length;$i++){
$item = $elements->item($i);
var_dump($item->nodeValue);
var_dump($item->textContent);
}
<?php
require './vendor/autoload.php';
function getContent($url)
{
$client = new GuzzleHttp\Client();
$res = $client->request('GET', $url);
echo $res->getStatusCode();
return $html = $res->getBody()->getContents();
}
function parse($html)
{
$dom = new DOMDocument();
$dom->normalize();
@$dom->loadHTML($html);
$xPath = new DOMXPath($dom);
$elements = $xPath->query('//a[contains(@class,"postTitle")]/@*');
$urls = [];
foreach ($elements as $e) {
if($e->nodeName == "href"){
$urls[] = $e->nodeValue;
}
}
return $urls;
}
$url = "https://www.cnblogs.com/brady-wang/";
$content = getContent($url);
$urls = parse($content);
var_dump($urls);
https://ask.csdn.net/questions/833515

浙公网安备 33010602011771号