第四关分页参数分析及翻页爬取

点击查看代码
import requests
from lxml import etree

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
for i in range(1, 6):
    url = f'https://www.spiderbuf.cn/playground/s04?pageno={i}'
    response = requests.get(url, headers=headers)
    # print(response.text)
    root = etree.HTML(response.text)
    trs = root.xpath('//table//tr')[1:]
    for tr in trs:
        num = tr.xpath('./td[1]/text()')[0]
        ip = tr.xpath('./td[2]/text()')[0]
        mac = tr.xpath('./td[3]/text()')[0]
        name = tr.xpath('./td[4]/text()')[0]
        type = tr.xpath('./td[5]/text()')[0]
        os = tr.xpath('./td[6]/text()')[0]
        if tr.xpath('./td[7]/text()'):
            port = tr.xpath('./td[7]/text()')[0]
        else:
            port = None
        online = tr.xpath('./td[8]/text()')[0]
        print(num, ip, mac, name, type, os, port, online)
    print(f'第{i}页完毕')


posted @ 2024-10-07 23:13  神仙不在  阅读(28)  评论(0)    收藏  举报