lxml的实际使用

点击查看代码
html = etree.HTML(resp.text)
divs = html.xpath("/html/body/div[6]/div/div/div[2]/div[5]/div[1]/div")

for div in divs:
    companyName = div.xpath("./div/div/a[1]/div[1]/p/text()")[1].strip()
    price = div.xpath("./div/div/a[2]/div[2]/div[1]/span[1]/text()")[0].strip("¥")
    software = "saas".join(div.xpath("./div/div/a[2]/div[2]/div[2]/p/text()"))

避免SSL警告
from requests.packages import urllib3

posted @ 2023-09-26 21:30  W3w  阅读(22)  评论(0)    收藏  举报