#_*_coding:utf-8_*_
__author__ = 'Linhaifeng'
'''
强大而又灵活的网页解析库,如果你觉得正则写起来太麻烦,如果你觉得beutifulsoup
语法太难记,如果你熟悉jquery的语法,那么pyquery是最佳选择
安装pyquery
pip3 install pyquery
'''
html='''
</div><div class="account-signin">
<ul class="navigation menu" aria-label="Social Media Navigation">
哈哈哈
<li class="tier-1 last" aria-haspopup="true">
<a href="/accounts/login/" title="Sign Up or Sign In to Python.org">Sign In</a>
<ul class="subnav menu">
<li class="tier-2 element-1" role="treeitem"><a href="/accounts/signup/">Sign Up / Register</a></li>
<li class="tier-2 element-2" role="treeitem"><a href="/accounts/login/">Sign In</a></li>
</ul>
</li>
</ul>
</div>
'''
#用法:
#1===========>初始化
#===>字符串初始化
# from pyquery import PyQuery as pq
# doc=pq(html)
# print(doc('.tier-2')) #默认就是css选择器
#===>url初始化
# from pyquery import PyQuery as pq
# doc=pq(url='http://www.baidu.com')
# print(doc('head'))
#===>文件初始化
# from pyquery import PyQuery as pq
# doc=pq(filename='demo.html')
# print(doc('li'))
#2===========>基本css选择器
from pyquery import PyQuery as pq
doc=pq(html)
# print(doc('.tier-2')) #默认就是css选择器
#查找元素
#子元素
# print(doc('li').find('li')) #这里的find是查找所有,但是不一定是直接子元素
# print('==>',doc('li').children('li')) #查找直接子元素
#父元素
# print(doc('.tier-2').parent())
#祖先元素:爹,爹的爹
# print(doc('.tier-2').parents())
# print(doc('.tier-2').parents('.account-signin')) #从祖先里筛选
#先补充:并列选择
# print(doc('.tier-1 .tier-2'))
# print(doc('.tier-1 .tier-2.element-1'))
#兄弟元素
# print(doc('.tier-2.element-1').siblings())
# print(doc('.tier-2.element-1').siblings('li a'))
#3===========>遍历
# lis=doc('li').items()
# print(lis)
#
# for i,j in enumerate(lis):
# print(i,j)
#4===========>获取属性
# print(doc('li').attr('class'))
# print(doc('a').attr.href)
# 5===========>获取文本
# print(doc('a').text())
#6===========>获取html
# print(doc('.subnav.menu'))
# print(doc('.subnav.menu').html())
#7===========>DOM
#addclass,removeclass
# tag=doc('.subnav.menu')
# print(tag)
#
# tag.addClass('active')
# print(tag)
#
# tag.removeClass('active')
# print(tag)
# tag=doc('.tier-2.element-1 a')
# tag.attr('name','link')
# tag.css('font-size','14px')
# print(tag)
tag=doc('.navigation.menu')
# print(tag.text()) #获取的是tag下所有的文本,
tag.find('li').remove()
print(tag.text()) #如果指向获取url下的那个"哈哈哈",则需要先删除li
#8===========>pyquery官网
# http://pyquery.readthedocs.io/en.latest/api.html
#9===========>伪类选择器
print(doc('li:first-child')) #选择li标签的第一个
print(doc('li:last-child')) #选择li标签的最后一个
print(doc('li:nth-child(2)')) #选择li标签的第2个
print(doc('li:gt(2)')) #选择li标签第2个以后的
print(doc('li:nth-child(2n)')) #选择li标签的偶数标签
print(doc('li:nth-child(2n+1)')) #选择li标签的奇数标签
print(doc('li:contains(second)')) #选择li标签中包含second文本的标签
#更多css选择器可以查看
# http://www.w3school.com.cn/css/index.asp
#官网:http://pyquery.readthedocs.io/