beautifulsoup 提取兄弟节点
beautifulsoup 提取兄弟节点
对于在同一个父节点下的兄弟节点有相同的div属相,无法直接定位,可以通过一层一层的嵌套剥出来!
import requests
from bs4 import BeautifulSoup
import openpyxl
list_all = []
url ='https://box.misiai.com/index.html'
resp = requests.get(url=url)
resp.close()
resp.encoding ="UTF-8"
page = BeautifulSoup(resp.text, "html.parser")
alist = page.find('div',{'class':'sites-list'}).find_all('div',{'class':'row'})[1].find_all('a')#一次到位
alist_1 = page.find('div',{'class':'sites-list'}).find_all('div',{'class':'row'})[1].find_all('strong')
alist_2 = page.find('div',{'class':'sites-list'}).find_all('div',{'class':'row'})[1].find_all('p')
for i,k,z in zip(alist,alist_1,alist_2):
name = k.get_text()
url = i.get('href')
jianjie = z.get_text()
jianjie = jianjie.replace(',','')
jianjie = jianjie.replace(',','')
img = 'https://favicon.cccyun.cc/'+url
a =[img,name,url,jianjie]
list_all.append(a)
result = open('result.xls', 'w',encoding='UTF-8')
for i in list_all:
for it in i:
result.write(str(it))
result.write('\t')
result.write('\n')
result.close()
浙公网安备 33010602011771号