# -*- coding: UTF-8 -*-
#爬虫
import urllib2
#import bs4
import re
import sys
from bs4 import BeautifulSoup
# import time
reload(sys)
sys.setdefaultencoding("utf-8")
#html=urllib2.urlopen("http://121.196.21.238/report.html")
#html=urllib2.urlopen("http://image.baidu.com/")
with open("report.html") as f:
content = f.read()
bsObj = BeautifulSoup(content,features='html.parser')
print bsObj.prettify()
#打印title
# print bsObj.title
# #<a href="http://baijiahao.baidu.com/s?id=1665731690282269956" target="_blank" mon="p=1&a=1&pn=1">G15沈海高速轿车起火 现场火光冲天</a>
# titlist = bsObj.findAll("a", {"target":"_blank"})
# #下面这两个功能是一样的
# bsObj.findAll(id="text")
# bsObj.findAll("", {"id":"text"})
# for x in titlist:
# print x.get_text()
# #http://t8.baidu.com/it/u=3571592872,3353494284&fm=79&app=86&size=h300&n=0&g=4n&f=jpeg?sec=1589296136&t=e713d1fe058c0dcb1714f9bc0fd4ee92
# titlist = bsObj.find("table",{"id":"result_table"}).tr.next_siblings
# for i in titlist:
# print i
list = bsObj.findAll(class="testcase")
for i in list:
print i