xpath解析基础

import requests
from lxml import etree

if __name__ == "__main__":
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }   # UA伪装
    # 实例化好了一个etree对象,且将被解析的文件加载到了对象中
    tree = etree.parse('test.html')
    # r = tree.xpath('/html/body/div') # '/html'中'/'从根目录(根节点)
    # r1 = tree.xpath('/html//div') # '//'表示示的是多个层级
    # r2 = tree.xpath('//div') # '//'表示的是多个层级
    # r = tree.xpath('//div[@class="song"]')
    # r = tree.xpath('//div[@class="song"]/p[3]') # 索引定位从1开始
    # r = tree.xpath('//div[@class="tang"]//li[5]/a/text()')[0] # 索引定位从1开始
    # r = tree.xpath('//div[@class="tang"]//li[7]//text()')[0] # 索引定位从1开始
    r = tree.xpath('//div[@class="song"]/img/@src')# 索引定位从1开始

    print(r)

 

posted @ 2020-04-13 16:59  风hua  阅读(131)  评论(0编辑  收藏  举报