C#中利用正则表达式提取网文链接

public static Regex regexAllLink = new Regex(
   @"((HREF)|(SRC))\s*=\s*['""+""\""""+@""]?  (?<LINK>[\w\W]+?) "
   + @"['"" + ""\""""mailto:+@%22%22\s\>]",
   RegexOptions.IgnoreCase
   | RegexOptions.CultureInvariant
   | RegexOptions.IgnorePatternWhitespace
   | RegexOptions.Compiled
   );

  private string[] ExctractAllLink(string content)
  {
   #region
   ArrayList arrLinks = new  ArrayList ();
   for(Match m1 = regexAllLink.Match (content ); m1.Success; m1 = m1.NextMatch ())
   {
    if(m1.Groups["LINK"].Value != null)
    {
     arrLinks.Add (m1.Groups["LINK"].Value.Replace ("./",""));
    }
   }
   return (string[]) arrLinks.ToArray (typeof(string));
   #endregion
  }

 

 

posted @ 2009-07-21 10:18  dayang  Views(185)  Comments(0)    收藏  举报