from BeautifulSoup import BeautifulSoup
import re
doc = ['<html><head><title>Page title</title></head>',
'<body><p id="firstpara" align="center">This is paragraph <b>one</b>.',
'<p id="secondpara" align="blah">This is paragraph <b>two</b>.',
'</html>']
soup = BeautifulSoup(''.join(doc))
print soup.prettify()
#获得第一级的标签
print soup.contents[0].name
#获得下一级的标签
print soup.contents[0].contents[0].name
head=soup.contents[0].contents[0]
#下一级
print head.next
#获得此节点的父节点
print head.parent.name
#下一级
print head.nextSibling.name
print head.nextSibling.contents[0].nextSibling
#下面是搜索一些标签的方法
#一个封闭的Title标签
titleTag = soup.html.head.title
print titleTag
#打印有多少p标签
print len(soup('p'))
#找到p标签,元素为align="center"的标记,感觉跟xpath一样,返回的是一列表
print soup.findAll('p',align="center")
#同上就一个标签而已
print soup.find('p',align="center")
#找到这个标签,并获得里面的ID元素
print soup('p', align="center")[0]['id']
#使用正则,功能同上
print soup.find('p', align=re.compile('^b.*'))['id']
#获得p标签里面b标签的内容
print soup.find('p').b.string
print soup('p')[0].b.string
print soup('p')[1].b.string
#修改soup
titleTag['id'] = 'theTitle'
titleTag.contents[0].replaceWith("New title")
print soup.html.head
另附一中文文档 :http://www.crummy.com/software/BeautifulSoup/documentation.zh.html#Quick%20Start
http://hi.baidu.com/javalang/blog/item/84bac4bf731fb80f18d81fe1.html