20131127-正则表达式

[1]从网页上下载图片

namespace 下载图片

{

class Program

{

static void Main(string[] args)

{

WebClient wc = new WebClient();

wc.Encoding = Encoding.Default;

string webAddress = "http://localhost:8080/美女图片/美女们.htm";

string strRegex = " <img alt=\"\" src=\"(.+)\" />";

string path = "E:\\test\\";

MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);

foreach (Match item in mat)

{

if (item.Success)

{

//拼接出图片在网络上的的真实路径

string realImgAddress = "http://localhost:8080/美女图片/" + item.Groups[1].Value;

string localpath = path + Path.GetFileName(item.Groups[1].Value);

wc.DownloadFile(realImgAddress, localpath);

}

}

Console.WriteLine("OK");

Console.ReadKey();

}

public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)

{

//获得图片所在网页的html的字符串

string html = wc.DownloadString(webAddress);

//通过匹配获得网页标签中图片的链接地址

Regex regex = new Regex(strRegex);

MatchCollection mat = regex.Matches(html);

return mat;

}

}

}

 

[2]从网页提取邮箱

namespace 网页提取邮箱2

{

class Program

{

static void Main(string[] args)

{

WebClient wc = new WebClient();

wc.Encoding = Encoding.Default;

string webAddress = "http://localhost:8080/提取Email.htm";

string strRegex = @"([0-9a-zA-Z_.-]+)@([0-9a-zA-Z-]+(\.[a-zA-Z]+){1,2})";

MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);

foreach (Match item in mat)

{

if (item.Success)

{

Console.WriteLine(item.Value+"===用户名为:"+item.Groups[1].Value+"===域名为:"+item.Groups[2].Value);

}

}

Console.WriteLine("一共有{0}", mat.Count);

Console.ReadKey();

}

public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)

{

string html = wc.DownloadString(webAddress);

Regex regex = new Regex(strRegex);

MatchCollection mat = regex.Matches(html);

return mat;

}

}

}

posted @ 2014-01-20 21:27  几维  阅读(226)  评论(0编辑  收藏  举报