BeautifulSoup爬虫

一个范例

from bs4 import BeautifulSoup
import requests
#原始网页文件
url = r'http://guba.eastmoney.com/'
r = requests.get(url)
html = r.text

#第一层解析
soup = BeautifulSoup(html,"html.parser")
#获取指定id的div模块
div = soup.find_all('div',id="hotArticle")

#div要转换str格式然后再塞入soup解析
soup2 = BeautifulSoup(str(div),"html.parser")



n = 0
li = ''
for i in soup2.find_all('a'):
    if n%2 != 0:
        print(i.string)
        li = li + i.string
    n+=1

#<a herf="", title = "dd",         /a>
#a就是标签，里面的herf和title都是属性，通过attr来获取
#<a class="balink" href="list,cjpl.html">财经评论吧</a>
soup2.a.attrs
#{'class': ['balink'], 'href': 'list,cjpl.html'}
soup2.a.string
#财经评论吧

posted on 2020-03-19 17:19 yukizzc 阅读(139) 评论(0) 收藏举报