python BeautifulSoup 基础知识点

笔记来源@大大的小番茄

1. 解析库
soup = BeautifulSoup(html,'html.parser')#Python标准库
BeautifulSoup(html,'lxml')#lxml HTML解析器
BeautifulSoup(html,'lxml')#lxml xml解析库
BeautifulSoup(html,'html.parser')#html5lib
2. 导包
from bs4 import BeautifulSoup
3. 获取所有div标签
divs = soup.find_all('div')
4. 获取指定div标签
divs = soup.find_all('div')[1]
5. 获取指定属性的标签
divs = soup.find_all('div',id='even')#方法一
divs = soup.find_all('div',attrs={'id':'even'})#方法二
6. 获取多个指定属性标签
span = soup.find_all('span',class_='position',width='350')#class属性为Python关键字,后加下划线区别
soup.find_all('span',attrs = {'class':'position','width':'350'})
7. 获取标签的属性值
alist = soup.find_all('a')
#通过下标方式提取
for a in alist:
	href = a['href']
#方法二
for a in alist:
	href = a.attrs['href']
8. 获取标签内容
a = div.find_all('a')[0]
position = a.string
9. 消除无用信息
infos = list(div.stripped_strings)
posted @ 2020-09-26 11:18  YuDi雨恋  阅读(142)  评论(0)    收藏  举报