C#中利用正则表达式提取网文链接
public static Regex regexAllLink = new Regex(
@"((HREF)|(SRC))\s*=\s*['""+""\""""+@""]? (?<LINK>[\w\W]+?) "
+ @"['"" + ""\""""mailto:+@%22%22\s\>]",
RegexOptions.IgnoreCase
| RegexOptions.CultureInvariant
| RegexOptions.IgnorePatternWhitespace
| RegexOptions.Compiled
);
private string[] ExctractAllLink(string content)
{
#region
ArrayList arrLinks = new ArrayList ();
for(Match m1 = regexAllLink.Match (content ); m1.Success; m1 = m1.NextMatch ())
{
if(m1.Groups["LINK"].Value != null)
{
arrLinks.Add (m1.Groups["LINK"].Value.Replace ("./",""));
}
}
return (string[]) arrLinks.ToArray (typeof(string));
#endregion
}

浙公网安备 33010602011771号