PyQuery库的使用(下篇)

3.4 遍历

3.41 单个元素输出

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
print(doc('input'))   #输出input标签
<input type="text" id="keyword" name="q" value="&#x767E;&#x5EA6;"/>百度
        <input type="hidden" name="src" class="src" value="srp"/>
        <input type="hidden" name="fr" value="se7_newtab_new"/>
        <input type="hidden" name="psid" value=""/>
    <input type="submit" value="&#x641C;&#x7D22;" class="s_btn"/>
      

3.42 遍历元素输出

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('input').items()  #生成一个产生器
print(type(items))
for li in items:   #遍历标签
    print(li)      #输出
<class 'generator'>
<input type="text" id="keyword" name="q" value="&#x767E;&#x5EA6;"/>百度

<input type="hidden" name="src" class="src" value="srp"/>

<input type="hidden" name="fr" value="se7_newtab_new"/>

<input type="hidden" name="psid" value=""/>

<input type="submit" value="&#x641C;&#x7D22;" class="s_btn"/>
      

3.5

3.51 获取属性

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('.src')   
print(items.attr.type)   #获取对应标签的属性type的值
print(items.attr('type'))   #作用同上
hidden
hidden
      

3.52 获取文本

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
a = doc('a')   
text = items.text()  #获取标签内容
print(text)
清空
      

3.53 获取HTML

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('a')   
html=items.html()
print(html)
清空
      

3.6 DOM操作

3.61 add class and remove class

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
a = doc('a')
print(a)
a.addClass('ative')    #添加class
print(a)
a.removeClass('ative')   #移除class
print(a)
<a href="javascript:;" title="&#x6E05;&#x7A7A;">清空</a>

<a href="javascript:;" title="&#x6E05;&#x7A7A;" class="ative">清空</a>

<a href="javascript:;" title="&#x6E05;&#x7A7A;" class="">清空</a>
      

3.62 attr \ css

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
items = doc('.src')   
print(items)
items.attr('name','link')  #改变属性name(没有name则添加)
print(items)
items.css('size','14px')   #改变style格式(没有则添加)
print(items)
<input type="hidden" name="src" class="src" value="srp"/>

<input type="hidden" name="link" class="src" value="srp"/>

<input type="hidden" name="link" class="src" value="srp" style="size: 14px"/>
      

3.63 remove

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
span = doc('span')   
print(span)
print(span.text())
span.find('a').remove()   #移除a标签的内容
print(span.text())
<span id="suggest-align">
        <a href="javascript:;" title="清空">清空</a>
        <input type="text" id="keyword" name="q" value="百度"/>百度
        <input type="hidden" name="src" class="src" value="srp"/>
        <input type="hidden" name="fr" value="se7_newtab_new"/>
        <input type="hidden" name="psid" value=""/>
    </span>
清空 百度
百度
      

4 伪类选择器

        html = '''
<body link="#1024ee" class="abv-115-control">
<div id="header" >     
    <span id="suggest-align" >
        <a href="javascript:;"  title="清空">清空</a>
        <input type="text" id="keyword" name="q"  value="百度">百度
        <input type="hidden" name="src" class="src" value="srp">
        <input type="hidden" name="fr" value="se7_newtab_new">
        <input type="hidden" name="psid" value="">
    </span><input type="submit"  value="搜索" class="s_btn">
'''
from pyquery import PyQuery
doc = PyQuery(html)
inputs = doc('input:first-child')   #第一个input标签
print(inputs)
inputs = doc('input:last-child')   #最后一个input标签
print(inputs)
inputs = doc('input:nth-child(2)')   #第二个input标签
print(inputs)
inputs = doc('input:nth-child(2n)')   #第2n个input标签
print(inputs)
inputs = doc('input:gt(2)')    #序号为2的input标签  (以0开始计数)
print(inputs)
inputs = doc('input:contains(second)')   #包含文本second的input标签
print(inputs)
<input type="hidden" name="psid" value=""/>
    <input type="submit" value="&#x641C;&#x7D22;" class="s_btn"/>

<input type="text" id="keyword" name="q" value="&#x767E;&#x5EA6;"/>百度
        <input type="submit" value="&#x641C;&#x7D22;" class="s_btn"/>

<input type="text" id="keyword" name="q" value="&#x767E;&#x5EA6;"/>百度
        <input type="hidden" name="fr" value="se7_newtab_new"/>
        <input type="submit" value="&#x641C;&#x7D22;" class="s_btn"/>

<input type="hidden" name="psid" value=""/>
    <input type="submit" value="&#x641C;&#x7D22;" class="s_btn"/>
      

posted @ 2018-07-23 15:01  jixn  阅读(178)  评论(0)    收藏  举报