第六关带iframe的页面源码分析及数据爬取

点击查看代码
import requests
from lxml import etree

url = 'https://www.spiderbuf.cn/playground/s06'
headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
r = requests.get(url, headers=headers)
html = etree.HTML(r.text)
src = 'https://www.spiderbuf.cn/' + html.xpath('//iframe/@src')[0]
print(src)
response = requests.get(src, headers=headers)
print(response.text)
root = etree.HTML(response.text)
trs = root.xpath('//table//tr')[1:]
for tr in trs:
    num = tr.xpath('./td[1]/text()')[0]
    ip = tr.xpath('./td[2]//text()')[0]
    mac = tr.xpath('./td[3]/text()')[0]
    name = tr.xpath('./td[4]/text()')[0]
    type = tr.xpath('./td[5]/text()')[0]
    os = tr.xpath('./td[6]/text()')[0]
    if tr.xpath('./td[7]/text()'):
        port = tr.xpath('./td[7]/text()')[0]
    else:
        port = None
    online = tr.xpath('./td[8]/font/text()')[0]
    print(num, ip, mac, name, type, os, port, online)


posted @ 2024-10-08 14:52  神仙不在  阅读(40)  评论(0)    收藏  举报