删除脚本 删除HTML标签的方法

 1      public static String RMHTML(String Htmlstring)
 2         {
 3 
 4             // 删除脚本
 5 
 6             Htmlstring = RegexPattern("<\\s*?script[^>]*>[\\s\\S]*?<\\s*?/\\s*?script\\s*?>", "", Htmlstring);
 7 
 8             // 删除HTML
 9 
10             Htmlstring = RegexPattern("<([^>]*)>", "", Htmlstring);
11 
12             Htmlstring = RegexPattern("([\r\n])[\\s]+", "", Htmlstring);
13 
14             Htmlstring = RegexPattern("-->", "", Htmlstring);
15 
16             Htmlstring = RegexPattern("<!--.*", "", Htmlstring);
17 
18             Htmlstring = RegexPattern("&(quot|#34);", "\"", Htmlstring);
19 
20             Htmlstring = RegexPattern("&(amp|#38);", "&", Htmlstring);
21 
22             Htmlstring = RegexPattern("&(lt|#60);", "<", Htmlstring);
23 
24             Htmlstring = RegexPattern("&(gt|#62);", ">", Htmlstring);
25 
26             Htmlstring = RegexPattern("&(nbsp|#160);", " ", Htmlstring);
27 
28             Htmlstring = RegexPattern("&(iexcl|#161);", "\\xa1", Htmlstring);
29 
30             Htmlstring = RegexPattern("&(cent|#162);", "\\xa2", Htmlstring);
31 
32             Htmlstring = RegexPattern("&(pound|#163);", "\\xa3", Htmlstring);
33 
34             Htmlstring = RegexPattern("&(copy|#169);", "\\xa9", Htmlstring);
35 
36             Htmlstring = RegexPattern("&#(\\d+);", "", Htmlstring);
37 
38             Htmlstring = RegexPattern("<", "", Htmlstring);
39 
40             Htmlstring = RegexPattern(">", "", Htmlstring);
41 
42             //Htmlstring.replace("\r\n", "",Htmlstring);
43 
44 
45 
46             return Htmlstring;
47 
48         }
49         public static String RegexPattern(String pattern, String str, String content)
50         {
51 
52             if (pattern != null && !pattern.Equals(""))
53             {
54                 //RegexOptions p = System.Text.RegularExpressions.RegexOptions.Compiled(pattern, 2); //参数2表示大小写不区分
55                 //Math m = (content);
56                 //content = m.replaceAll(str);
57                 content = Regex.Replace(content, pattern, str, RegexOptions.IgnoreCase);
58             }
59             return content;
60         } 

 

posted @ 2013-05-31 11:13  mushishi  阅读(278)  评论(0编辑  收藏  举报