PyQuery库的使用(下篇)
3.4 遍历
3.41 单个元素输出
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
print(doc('input')) #输出input标签
<input type="text" id="keyword" name="q" value="百度"/>百度
<input type="hidden" name="src" class="src" value="srp"/>
<input type="hidden" name="fr" value="se7_newtab_new"/>
<input type="hidden" name="psid" value=""/>
<input type="submit" value="搜索" class="s_btn"/>
3.42 遍历元素输出
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('input').items() #生成一个产生器
print(type(items))
for li in items: #遍历标签
print(li) #输出
<class 'generator'>
<input type="text" id="keyword" name="q" value="百度"/>百度
<input type="hidden" name="src" class="src" value="srp"/>
<input type="hidden" name="fr" value="se7_newtab_new"/>
<input type="hidden" name="psid" value=""/>
<input type="submit" value="搜索" class="s_btn"/>
3.5
3.51 获取属性
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('.src')
print(items.attr.type) #获取对应标签的属性type的值
print(items.attr('type')) #作用同上
hidden
hidden
3.52 获取文本
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
a = doc('a')
text = items.text() #获取标签内容
print(text)
清空
3.53 获取HTML
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('a')
html=items.html()
print(html)
清空
3.6 DOM操作
3.61 add class and remove class
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
a = doc('a')
print(a)
a.addClass('ative') #添加class
print(a)
a.removeClass('ative') #移除class
print(a)
<a href="javascript:;" title="清空">清空</a>
<a href="javascript:;" title="清空" class="ative">清空</a>
<a href="javascript:;" title="清空" class="">清空</a>
3.62 attr \ css
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('.src')
print(items)
items.attr('name','link') #改变属性name(没有name则添加)
print(items)
items.css('size','14px') #改变style格式(没有则添加)
print(items)
<input type="hidden" name="src" class="src" value="srp"/>
<input type="hidden" name="link" class="src" value="srp"/>
<input type="hidden" name="link" class="src" value="srp" style="size: 14px"/>
3.63 remove
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
span = doc('span')
print(span)
print(span.text())
span.find('a').remove() #移除a标签的内容
print(span.text())
<span id="suggest-align">
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度"/>百度
<input type="hidden" name="src" class="src" value="srp"/>
<input type="hidden" name="fr" value="se7_newtab_new"/>
<input type="hidden" name="psid" value=""/>
</span>
清空 百度
百度
4 伪类选择器
html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >
<span id="suggest-align" >
<a href="javascript:;" title="清空">清空</a>
<input type="text" id="keyword" name="q" value="百度">百度
<input type="hidden" name="src" class="src" value="srp">
<input type="hidden" name="fr" value="se7_newtab_new">
<input type="hidden" name="psid" value="">
</span><input type="submit" value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
inputs = doc('input:first-child') #第一个input标签
print(inputs)
inputs = doc('input:last-child') #最后一个input标签
print(inputs)
inputs = doc('input:nth-child(2)') #第二个input标签
print(inputs)
inputs = doc('input:nth-child(2n)') #第2n个input标签
print(inputs)
inputs = doc('input:gt(2)') #序号为2的input标签 (以0开始计数)
print(inputs)
inputs = doc('input:contains(second)') #包含文本second的input标签
print(inputs)
<input type="hidden" name="psid" value=""/>
<input type="submit" value="搜索" class="s_btn"/>
<input type="text" id="keyword" name="q" value="百度"/>百度
<input type="submit" value="搜索" class="s_btn"/>
<input type="text" id="keyword" name="q" value="百度"/>百度
<input type="hidden" name="fr" value="se7_newtab_new"/>
<input type="submit" value="搜索" class="s_btn"/>
<input type="hidden" name="psid" value=""/>
<input type="submit" value="搜索" class="s_btn"/>

浙公网安备 33010602011771号