#coding:utf-8
from HTMLParser import HTMLParser
import sys
listCount=[]
class MyHTMLParser(HTMLParser):
#处理开始标签
def handle_starttag(self, tag, attrs):
"""
recognize start tag, like <div>
:param tag:
:param attrs:
:return:
"""
print("Encountered a start tag:", tag)
#处理结束标签
def handle_endtag(self, tag):
"""
recognize end tag, like </div>
:param tag:
:return:
"""
print("Encountered an end tag :", tag)
#处理数据,标签之间的文本,并进行统计
def handle_data(self, data):
"""
recognize data, html content string
:param data:
:return:
"""
print("Encountered some data :", data)
print('-----------------------------------')
listCount.append(data)
changdu=len(listCount)
print listCount
print changdu
if changdu==54:
FailCount=listCount.count('Fail')
PassCount=listCount.count('Pass')
print('统计失败的次数为:'+str(FailCount))
print('统计成功的次数为:'+str(PassCount))
else:
print('continue')
#处理结束的标签
def handle_startendtag(self, tag, attrs):
"""
recognize tag that without endtag, like <img />
:param tag:
:param attrs:
:return:
"""
print("Encountered startendtag :", tag)
#处理注释
def handle_comment(self,data):
"""
:param data:
:return:
"""
print("Encountered comment :", data)
#打开html文件并逐行读取然后传给HTMLPrase进行解析
file=open('123.html','r')
while 1:
readFile=file.readline()
print(readFile)
parser = MyHTMLParser()
parser.feed(readFile)
if not readFile:
break
file.close()