import requests
from lxml import etree # 导入xpath
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
}
tree = etree.parse('./相关/test.html')
#定位标签
# print(tree.xpath('//div')) #定位指定的所有标签
#属性定位
# print(tree.xpath('//div[@class="tang"]'))
#索引定位:索引是从1开始的
# print(tree.xpath('//div[1]'))
#层级定位:/表示一个层级,//表示多个层级
# print(tree.xpath('//div[@class="tang"]/ul/li[4]/a'))
# print(tree.xpath('//div[@class="tang"]//li[4]/a'))
#内容提取:/text(),//text()
# print(tree.xpath('//div[@class="tang"]/ul/li[4]/a/text()')[0])
#取属性值:/@atrrName
# print(tree.xpath('//div[@class="tang"]/ul/li[4]/a/@href')[0])