requests--etree--xpath

# -*- coding: cp936 -*-
import requests
from lxml import etree

url = 'https://weibo.cn/pub/'
html = requests.get(url).content
#先用.content再用etree.HTML(html)方法,不然报错

selector = etree.HTML(html)

'''
#文字部分
titles = selector.xpath('//a/text()')
#属性部分,包括链接
ti = selector.xpath('//ahref/text()')
for i in ti:
    print i
print('type:'+str(type(titles)))
for i in titles:
    print i
'''

titles = selector.xpath('*//div/a/@href')
for i in titles:
    print i

 

posted @ 2018-05-25 21:05  了解2号  阅读(429)  评论(0编辑  收藏  举报