1 /// <summary>
2 /// 过滤字符
3 /// </summary>
4 /// <param name="html"></param>
5 /// <returns></returns>
6 public string checkStr(string html)
7 {
8 System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
9 System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
10 System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
11 System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
12 System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
13 System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
14 System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
15 System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
16 System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
17
18 // 过滤 <!-- -->
19 System.Text.RegularExpressions.Regex regex10 = new System.Text.RegularExpressions.Regex(@"<!--[\s\S]*?-->", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
20 // 过滤 <script>...</script>
21 System.Text.RegularExpressions.Regex regex11 = new System.Text.RegularExpressions.Regex(@"<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?script[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
22 // 过滤 <style>...</style>
23 System.Text.RegularExpressions.Regex regex12 = new System.Text.RegularExpressions.Regex(@"<[\s]*?style[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?style[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
24 // 过滤事作 匹配如:<img onclick="if(this.width>1000) return false;"/>
25 System.Text.RegularExpressions.Regex regex13 = new System.Text.RegularExpressions.Regex(@"<[^>]*?([^>]*?[\s]on[\w]+[\s]*?=[\s]*?([""']?)([^\2]+?)\2)+[^>]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
26 // 过滤HTML标签
27 System.Text.RegularExpressions.Regex regex14 = new System.Text.RegularExpressions.Regex(@"<[\s\S]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
28
29 html = regex1.Replace(html, ""); //过滤<script></script>标记
30 html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
31 html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
32 html = regex4.Replace(html, ""); //过滤iframe
33 html = regex5.Replace(html, ""); //过滤frameset
34 html = regex6.Replace(html, ""); //过滤frameset
35 html = regex7.Replace(html, ""); //过滤frameset
36 html = regex8.Replace(html, ""); //过滤frameset
37 html = regex9.Replace(html, "");
38
39 html = regex10.Replace(html, "");
40 html = regex11.Replace(html, "");
41 html = regex12.Replace(html, "");
42 html = regex13.Replace(html, "");
43 html = regex14.Replace(html, "");
44
45 html = html.Replace(" ", "");
46 html = html.Replace("<>", "");
47 html = html.Replace("<strong>", "");
48 html = html.Replace("<br>", "");
49 html = html.Replace("\r", "");
50 html = html.Replace("\n", "");
51 html = html.Replace("<p>", "");
52 html = html.Replace("</p>", "");
53 html = html.Replace("<BR>", "");
54 html = html.Replace("&", "");
55 html = html.Replace("<P>", "");
56 html = html.Replace("</P><P></P>", "");
57 html = html.Replace("</P></P>", "");
58 html = html.Replace("/Div;", "");
59 html = html.Replace("Div;", "");
60 html = html.Replace("/div;", "");
61 html = html.Replace("div;", "");
62 html = html.Replace("nbsp;", "");
63 html = html.Replace("style="LINE-HEIGHT: 22px;", "");
64 html = html.Replace("class=p1", "");
65
66 return html;
67 }