print(driver.page_source)
# 定位至节点
elem = driver.find_element(By.ID, "info")
htm_dat = elem.get_property("outerHTML")
print('获取节点的html源码:', htm_dat)
htm_name = elem.get_property("nodeName")
print('节点名称:', htm_name)
htm_type = elem.get_property("nodeType")
print('节点类型:', htm_type)
htm_ght = elem.get_property("clientHeight")
print('节点实际高度:', htm_ght)
htm_dth = elem.get_property("clientWidth")
print('节点实际宽度:', htm_dth)
htm_node_name = elem.get_property("parentNode").get_property("nodeName")
print('该节点的父节点.名称:', htm_node_name)
htm_next_htm = elem.get_property("nextSibling").get_property("outerHTML")
print('该节点的相邻的下一个节点.源码:', htm_next_htm)
from selenium.webdriver import Firefox, FirefoxOptions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
class ContentPageParser():
def __init__(self, content_page_url) -> None:
self.content_url = content_page_url # 抓取content_page_url页面上全部的图片url
self.img_src = [] # 保存当前页面上所有的图片url
def visit_content_page_with_firefox(self):
option = FirefoxOptions()
# 设置浏览器为无头模式,使用过程中不会弹出浏览器页面
option.headless = True
self.driver = Firefox(options=option)
try:
# 打开待抓取的url页面
self.driver.get(self.content_url)
# 设置灵活等待,最长等待10s,轮询间隔为1s
wait = WebDriverWait(self.driver, timeout=10, poll_frequency=1)
# 使用css选择器进行元素定位,直到元素可见为止
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'img[class="showimg"]')))
# 使用css选择器查找所有元素
imgs = self.driver.find_elements(By.CSS_SELECTOR, 'img[class="showimg"]')
# 提取所有图片的url
for img in imgs:
self.img_src.append(img.get_attribute('src'))
except Exception as e:
print(repr(e))
finally:
# 关闭webdriver
self.driver.close()
def get_img_src(self):
return self.img_src
if __name__ == '__main__':
content_parser = ContentPageParser('https://xxx/content_48495.html')
content_parser.visit_content_page_with_firefox()
img = content_parser.get_img_src()
print(img)