项目中解决小问题的各种方法
一、获取HTML标签中的文字内容
/// <summary>
/// 去除HTML标记
/// </summary>
/// <param name="NoHTML">包括HTML的源码 </param>
/// <returns>已经去除后的文字</returns>
public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
二、截取字符串
/// <summary>
/// 截取字符串(按全角长度,一个全角等于两个半角)
/// </summary>
/// <param name="strSource">字符串</param>
/// <param name="length">截取长度(全角)</param>
/// <returns></returns>
public static string InterceptString(string strSource, int length)
{
string strResult = InterceptString(strSource, length, 0);
if (strResult == strSource)
{
return strResult;
}
else
{
return strResult + "...";
}
}
private static string InterceptString(string str, int length, int length1)
{
if (str.Length > length)
{
if (length1 == 0)
{
length1 = length;
}
if (str.Length >= length1)
{
string str_left = str.Substring(0, length1);
//string str_right = str.Remove(0, length1);
int banjiao = 0, other = 0;
char[] chars = str_left.Trim().ToCharArray();
foreach (char ch in chars)
{
if (ch >= 33 && ch <= 126)
{
banjiao++;
}
else
{
other++;
}
}
if ((other + (float)banjiao / 2) > length || (other + (float)banjiao / 2) - length == 0.5)
{
str_left = str_left.Remove(str_left.Length - 1, 1);
return str_left;
}
else if ((other + (float)banjiao / 2) - length != 0)
{
if (length1 + 1 <= str.Length)
{
str_left = InterceptString(str, length, length1 + 1);
}
}
return str_left;
}
else
{
return str;
}
}
else
{
return str;
}
}
三、获取图片URL
/// <summary>
/// 利用正则表达式获取文本中图片的URL
/// </summary>
/// <param name="sHtmlText">文本</param>
/// <returns>返回图片路径集合</returns>
public static string[] GetHtmlImageUrlList(string sHtmlText)
{
// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
// 搜索匹配的字符串
MatchCollection matches = regImg.Matches(sHtmlText);
int i = 0;
string[] sUrlList = new string[matches.Count];
// 取得匹配项列表
foreach (Match match in matches)
sUrlList[i++] = match.Groups["imgUrl"].Value;
return sUrlList;
}

浙公网安备 33010602011771号