python之xpath

 1 #!/usr/bin/env python3
 2 # -*- coding: utf-8 -*-
 3 # author：Momo time:2018/6/29
 4 
 5 import urllib.request
 6 import urllib
 7 
 8 from lxml import etree
 9 
10 
11 def get_html(url):
12     html_page = urllib.request.urlopen(url)
13     html_code = html_page.read().decode('utf-8')
14     return html_code
15 
16 html = get_html("http://www.runoob.com/python3/python3-reg-expressions.html")
17 
18 selector = etree.HTML(html)
19 """
20     // 定位根节点
21     /  往下一层寻找
22     /text()  提取文本内容
23     /@XXXX   提取属性内容
24 """
25 # # 提取文本
26 # content = selector.xpath('//*[@id="content"]/p/text()')  # /text()
27 # for each in content:
28 #     print(each)
29 
30 # # 提取属性
31 # link = selector.xpath('/html/body/link/@href')
32 # for each in link:
33 #     print(each)
34 
35 table = selector.xpath('//*[@id="content"]/table/@class')
36 for each in table:
37     print(each)

posted @ 2018-09-05 21:03 肖邦、维也纳阅读(111) 评论(0) 收藏举报

刷新页面返回顶部

肖邦、维也纳

python之xpath

公告