层次遍历网页的树结构

from bs4 import BeautifulSoup
from queue import Queue
import requests

#按层次遍历，同级的一起遍历
url="https://www.pythonscraping.com/pages/page3.html"
html=requests.get(url)
soup=BeautifulSoup(html.text,'lxml') #解析网页
q=Queue()  #创建队列
root=soup.html #获取根节点
q.put(root)  #存储根节点
while not q.empty():  #当队列不为空时
    temp=q.get()  #取出队列中的元素
    print(temp)
    child=temp.firstChild #找出根节点的第一个孩子结点
    while child: #当第一个孩子结点不为空时
        q.put(child)  #存入队列
        child=child.nextSibling  #获取孩子结点的兄弟节点

posted @ 2022-05-06 14:27 芬芬的笔迹*本阅读(91) 评论(0) 收藏举报

刷新页面返回顶部

ffxqc

层次遍历网页的树结构

公告