[通用类库]去除HTML的类
2007-08-23 10:03 水随风 阅读(298) 评论(0) 收藏 举报 1
using System;
2
using System.Data;
3
using System.Configuration;
4
using System.Web;
5
using System.Text.RegularExpressions;
6
using System.Web.Security;
7
using System.Web.UI;
8
using System.Web.UI.WebControls;
9
using System.Web.UI.WebControls.WebParts;
10
using System.Web.UI.HtmlControls;
11
12
/// <summary>
13
/// Summary description for CutHtml
14
/// </summary>
15
public class CutHtml
16
{
17
public CutHtml()
18
{
19
//
20
// TODO: Add constructor logic here
21
//
22
}
23
public string ScriptHtml(string HtmlStr)
24
{
25
string[] aryReg = {
26
@"<script[^>]*?>.*?</script>",
27
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
28
@"([\r\n])[\s]+",
29
@"&(quot|#34);",
30
@"&(amp|#38);",
31
@"&(lt|#60);",
32
@"&(gt|#62);",
33
@"&(nbsp|#160);",
34
@"&(iexcl|#161);",
35
@"&(cent|#162);",
36
@"&(pound|#163);",
37
@"&(copy|#169);",
38
@"&#(\d+);",
39
@"-->",
40
@"<!--.*\n"
41
};
42
43
string[] aryRep = {
44
"",
45
"",
46
"",
47
"\"",
48
"&",
49
"<",
50
">",
51
" ",
52
"\xa1",//chr(161),
53
"\xa2",//chr(162),
54
"\xa3",//chr(163),
55
"\xa9",//chr(169),
56
"",
57
"\r\n",
58
""
59
};
60
string newReg = aryReg[0];
61
62
string strOutput = HtmlStr;
63
64
for (int i = 0; i < aryReg.Length; i++)
65
{
66
67
Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
68
69
strOutput = regex.Replace(strOutput, aryRep[i]);
70
71
}
72
73
strOutput.Replace("<", "");
74
75
strOutput.Replace(">", "");
76
77
strOutput.Replace("\r\n", "");
78
79
return strOutput;
80
}
81
//去除html
82
public static string ParseTags(string HTMLStr)
83
{
84
85
return Regex.Replace(HTMLStr, "<[^>]*>", "");
86
87
}
88
////去除图片标签
89
//public static string GetImgUrl(string HTMLStr)
90
//{
91
92
// string str = string.Empty;
93
94
// string sPattern = @"^<img\s+[^>]*>";
95
96
// Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",
97
98
// RegexOptions.Compiled);
99
100
// Match m = r.Match(HTMLStr.ToLower());
101
102
// if (m.Success)
103
104
// str = m.Result("${url}");
105
106
// return str;
107
108
//}
109
110
}
111
using System;2
using System.Data;3
using System.Configuration;4
using System.Web;5
using System.Text.RegularExpressions;6
using System.Web.Security;7
using System.Web.UI;8
using System.Web.UI.WebControls;9
using System.Web.UI.WebControls.WebParts;10
using System.Web.UI.HtmlControls;11

12
/// <summary>13
/// Summary description for CutHtml14
/// </summary>15
public class CutHtml16
{17
public CutHtml()18
{19
//20
// TODO: Add constructor logic here21
//22
}23
public string ScriptHtml(string HtmlStr)24
{25
string[] aryReg = {26
@"<script[^>]*?>.*?</script>",27
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",28
@"([\r\n])[\s]+",29
@"&(quot|#34);",30
@"&(amp|#38);",31
@"&(lt|#60);",32
@"&(gt|#62);", 33
@"&(nbsp|#160);", 34
@"&(iexcl|#161);",35
@"&(cent|#162);",36
@"&(pound|#163);",37
@"&(copy|#169);",38
@"&#(\d+);",39
@"-->",40
@"<!--.*\n"41
};42

43
string[] aryRep = {44
"",45
"",46
"",47
"\"",48
"&",49
"<",50
">",51
" ",52
"\xa1",//chr(161),53
"\xa2",//chr(162),54
"\xa3",//chr(163),55
"\xa9",//chr(169),56
"",57
"\r\n",58
""59
};60
string newReg = aryReg[0];61

62
string strOutput = HtmlStr;63

64
for (int i = 0; i < aryReg.Length; i++)65
{66

67
Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);68

69
strOutput = regex.Replace(strOutput, aryRep[i]);70

71
}72

73
strOutput.Replace("<", "");74

75
strOutput.Replace(">", "");76

77
strOutput.Replace("\r\n", "");78

79
return strOutput;80
}81
//去除html82
public static string ParseTags(string HTMLStr)83
{84

85
return Regex.Replace(HTMLStr, "<[^>]*>", "");86

87
}88
////去除图片标签89
//public static string GetImgUrl(string HTMLStr)90
//{91

92
// string str = string.Empty;93

94
// string sPattern = @"^<img\s+[^>]*>";95

96
// Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",97

98
// RegexOptions.Compiled);99

100
// Match m = r.Match(HTMLStr.ToLower());101

102
// if (m.Success)103

104
// str = m.Result("${url}");105

106
// return str;107

108
//}109

110
}111



浙公网安备 33010602011771号