from bs4 import BeautifulSoup
import urllib2
html = urllib2.urlopen('http://tieba.baidu.com/p/5058456989')
bsobj = BeautifulSoup(html.read(), "html.parser") # 不加"html.parser"会有警告。。。。
print bsobj.title
underline = '-'*100
def get_title(url):
try:
html = urllib2.urlopen(url)
except HTTPError, e:
raise e
return None
try:
bsobj = BeautifulSoup(html.read(), "html.parser")
title = bsobj
except AttributeError, e:
raise e
return None
return title
url = 'http://tieba.baidu.com/p/4420237089?see_lz=1'
title = get_title(url)
if title is None:
print 'title is none'
else:
print underline
# print title
tmp = title.findAll("div", {"class": "d_post_content j_d_post_content "})
vmp = title.findAll("span", {"class": "tail-info"})
# for v in vmp.tr.next_siblings:
# print v
for val, f in zip(tmp, vmp[1:-1:3]):
print val.get_text()
print f.get_text(), underline