using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace Utility
{
public class HtmlUtil
{
public static string StripHTML(string strHtml)
{
//All the regular expression for matching html, javascript, style elements and others.
string[] aryRegex ={@"", @"", @"", @"", @"([\r\n])[\s]+",
@"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);",
@"&#(\d+);", @"-->", @"
//Corresponding replacment to the regular expressions.
string[] aryReplacment = { "", "", "", "", "", " ", "\xa1", "\xa2", "\xa3", "\xa9", "", "\r\n", "" };
string strStripped = strHtml;
//Loop to replacing.
for (int i = 0; i
{
Regex regex = new Regex(aryRegex[i], RegexOptions.IgnoreCase);
strStripped = regex.Replace(strStripped, aryReplacment[i]);
}
//Replace "\r\n" to an empty character.
strStripped.Replace("\r\n", "");
//Return stripped string.
return strStripped;
}
}
}
http://workgroup.cn/CS/blogs/aspnet/archive/2006/06/06/_636B19526888BE8F0F5FCE4E_HTML_2D4ED063D65387652C678551B95B_.aspx
posted on 2007-03-08 00:07  mbskys  阅读(1799)  评论(0)    收藏  举报