应用HTMLParser解释操作HTML内容
至于她为什么要那么命名,为什么要那么做就不用深究了,总之用htmlparser做html解释是挺不错的选择。
根据“笑起来像你”发过来的原装代码展示如下:
1
import java.util.Vector;2

3
import org.htmlparser.Attribute;4
import org.htmlparser.Node;5
import org.htmlparser.Parser;6
import org.htmlparser.Tag;7
import org.htmlparser.tags.ScriptTag;8

9
import org.htmlparser.util.NodeList;10
import org.htmlparser.util.ParserException;11

12

public class dfdasfads
{13

14

public static void main(String[] args) throws ParserException
{15
StringBuffer sb = new StringBuffer();16
sb.append("<p id=\"p1\"><span id=\"s2\" name=\"s22\">vvv</span></p><script>var char='aaaaa';</script>\r\n<input test
.>\r\n<h1 dfasdf
..>");17
System.out.println(new dfdasfads().getFilterBody(sb.toString()));18
}19

20

public String getFilterBody(String strBody)
{21
// 一、 htmlparser 处理提交22
Parser parser = Parser.createParser(strBody, "utf-8");23
NodeList list;24
String reValue = strBody;25

try
{26
list = parser.parse(null);27
visitNodeList(list);28
reValue = list.toHtml();29

} catch (ParserException e1)
{30
e1.printStackTrace();31
}32
return reValue;33
}34

35

/** *//** 36
* 移除所有标签的Id属性37
* @param list 标签集合38
*/39

private void visitNodeList(NodeList list)
{40
System.out.println(">>>visitNodeList(list)");41

for (int i = 0; i < list.size(); i++)
{42
Node node = list.elementAt(i);43
44

if (node instanceof ScriptTag)
{45
list.remove(i);46
continue;47

} else if (node instanceof Tag)
{48
Tag _tag = (Tag) node;49

50
_tag.removeAttribute("id");51
}52

53
NodeList children = node.getChildren();54

if (children != null && children.size() > 0)
{55
visitNodeList(children);56
}57

58
}59
System.out.println("<<<visitNodeList(list)");60
}61
}62

1

2

3
import java.util.Vector;4

5

6
import org.htmlparser.Attribute;7
import org.htmlparser.Node;8
import org.htmlparser.Parser;9
import org.htmlparser.Tag;10

11
import org.htmlparser.tags.ScriptTag;12
import org.htmlparser.util.NodeList;13
import org.htmlparser.util.ParserException;14

15

16

17

18

public class FilterBody
{19

20

21

public static void main(String[] args) throws ParserException
{22
String sttt = "<embed allowFullScreen=\"true\" src=\"http://vhead.blog.sina.com.cn/player/outer_player.swf\" quality=\"high\" bgcolor=\"#ffffff\" width=\"424\" height=\"404\" name=\"vsplayer\" align=\"middle\" type=\"application/x-shockwave-flash\" pluginspage=\"http://www.macromedia.com/go/getflashplayer\" />";23
System.out.println(new FilterBody().getFilterBody(sttt));24

25
}26

27

public String getFilterBody(String strBody)
{28

29
// 一、 htmlparser 处理提交30
Parser parser = Parser.createParser(strBody, "utf-8");31
NodeList list;32
String reValue = strBody;33

try
{34
list = parser.parse(null);35
visitNodeList(list);36
reValue = list.toHtml();37

} catch (ParserException e1)
{38
39
}40

41
;42

43
return reValue;44
}45

46
47

48
// 递归49

private void visitNodeList(NodeList list)
{50

for (int i = 0; i < list.size(); i++)
{51
Node node = list.elementAt(i);52

53

if (node instanceof Tag)
{54

if (node instanceof ScriptTag)
{55
list.remove(i);56
continue;57
}58
Tag _tag = (Tag) node;59
_tag.removeAttribute("id");60
_tag.removeAttribute("onload");61
_tag.removeAttribute("alt");62

63
String tagName = _tag.getTagName();64
if (tagName == null)65
tagName = "";66
else67
tagName = tagName.trim().toUpperCase();68

69
70
visitTag((Tag) node);71
}72

73
NodeList children = node.getChildren();74
if (children != null && children.size() > 0)75
visitNodeList(children);76

77
}78
}79

80
// 获取tag81

private void visitTag(Tag tag)
{82
String tagName = tag.getTagName();83

if (tagName != null && tagName.equalsIgnoreCase("embed"))
{84
tag.setEmptyXmlTag(false);85
tag.setAttribute("AllowNetworking", "\"none\"");86
tag.setAttribute("AllowScriptAccess", "never", '"');87
tag.setEmptyXmlTag(true);88
}89
90
Vector attrs = tag.getAttributesEx();91

92

for (int i = 0; i < attrs.size(); i++)
{93
Object obj = attrs.elementAt(i);94

if (obj != null && obj instanceof Attribute)
{95
visitAttribute((Attribute) obj, tag);96
}97
}98
}99

100
// 获取tag属性101

private void visitAttribute(Attribute attribute, Tag tag)
{102
String attName = attribute.getName();103
if (attName == null)104
attName = "";105
else106
attName = attName.trim().toLowerCase();107
String tagName = tag.getTagName();108
if (tagName == null)109
tagName = "";110
else111
tagName = tagName.trim().toLowerCase();112
String tagValue = tag.getText();113
if (tagValue == null)114
tagValue = "";115
else116
tagValue = tagValue.trim().toLowerCase();117
String attribValue = attribute.getValue();118
if (attribValue == null)119
attribValue = "";120
else121
attribValue = attribValue.trim().toLowerCase();122

123
}124

125
}126


浙公网安备 33010602011771号