C#中利用正则表达式提取网文链接

public static Regex regexAllLink = new Regex(
   @"((HREF)|(SRC))\s*=\s*['""+""\""""+@""]? (?<LINK>[\w\W]+?) "
   + @"['"" + ""\""""mailto:+@%22%22\s\>]",
   RegexOptions.IgnoreCase
   | RegexOptions.CultureInvariant
   | RegexOptions.IgnorePatternWhitespace
   | RegexOptions.Compiled
   );

private string[] ExctractAllLink(string content)
{
   #region
   ArrayList arrLinks = new ArrayList ();
   for(Match m1 = regexAllLink.Match (content ); m1.Success; m1 = m1.NextMatch ())
   {
    if(m1.Groups["LINK"].Value != null)
    {
     arrLinks.Add (m1.Groups["LINK"].Value.Replace ("./",""));
    }
   }
   return (string[]) arrLinks.ToArray (typeof(string));
   #endregion
}

posted @ 2009-07-21 10:18 dayang Views(185) Comments(0) 收藏举报

刷新页面返回顶部

dayang

C#中利用正则表达式提取网文链接

公告