1 #coding=utf-8
2 from urllib2 import urlopen
3 from bs4 import BeautifulSoup
4 import urllib2
5 url="http://pythonscraping.com/pages/page1.html"
6 def getTitle(url):
7 """
8 说明一下,处理异常的过程
9 1.检查是否能打开网页 异常类型为urllib2.HTTPError
10 2.检查是否服务器存在,不存在返回空,那么在read是返回AttributeError
11 :param url:
12 :return:
13 """
14 try:
15
16 html=urlopen(url)
17 except urllib2.HTTPError as e:
18 #这里的错误是网页不存在
19 print e
20 return None
21 try:
22 bsobj=BeautifulSoup(html.read(),"html.parser")
23 title=bsobj.body.h1
24 except AttributeError as e:
25 return None
26 return title
27 title=getTitle(url)
28 if title is None:
29 print "Title could not be found"
30 else:
31 print title