xpath解析
from lxml import etree
# 获取本地文件
tree = etree.parse('bendi.html')
print(tree)
# /表示子元素,//表示子孙后代元素
li = tree.xpath('//body/ul/li')
print(li)
print(len(li))
# 获取有id的li
liid = tree.xpath('//body/ul/li[@id]/text()')
for i in liid:
print(i)
# 获取id为bj的li
libj = tree.xpath('//body/ul/li[@id="bj"]/text()')
print(libj)
获取属性;
属性查询
//@class
获取百度一下:
url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
content = response.read().decode('utf-8')
tree1 = etree.HTML(content)
val = tree1.xpath('//input[@id="su"]//@value')
print(val[0])
浙公网安备 33010602011771号