网上的照片爬虫

  

static void Main(string[] args)
{

string url = "http://desk.zol.com.cn/bizhi/5803_72104_2.html";
HttpCrawelHelper.CreatFile();
string path = Path.Combine(@"C:\Users\topo\Desktop\新建文件夹 (2)\s");
HttpCrawelHelper.HttpGetHandle(url, path, 1);
Console.ReadKey();

 

}
}
}
class HttpCrawelHelper
{

public static void HttpGetHandle(string url, string path, int name)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.CreateHttp(url);
webRequest.Method = "GET";
webRequest.UserAgent = " Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0";
var webResponse = webRequest.GetResponse();
StreamReader streamReader = new StreamReader(webResponse.GetResponseStream(), Encoding.UTF8);
string str = streamReader.ReadToEnd();
streamReader.Close();
if (string.IsNullOrEmpty(str))
{
Console.WriteLine("————————-错误—————————");
Console.ReadKey();
}
Regex regex = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<Group>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>");
// Regex regex = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<Group>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>");
MatchCollection match = regex.Matches(str);

WebClient client = new WebClient();
int temp = 0;
try
{
foreach (Match match1 in match)
{
string src = match1.Groups[1].Value;
if (src.Contains("http") && !src.Contains(".svg"))
{
temp++;
client.DownloadFile(src, path + name + ".jpg");
name++;
Console.WriteLine("\n正在爬取———————" + "|" + temp);
}
}
}
catch (Exception ex)
{
Console.WriteLine("-------------" + ex);
}
stopwatch.Stop();
Console.WriteLine("————-———爬取成功!—————");
Console.WriteLine("\n_______总共爬取了" + temp + "张图片!_______________");
Console.WriteLine("\n一共耗时" + stopwatch.ElapsedMilliseconds / 1000 + "秒");
}

public static void CreatFile()
{
if (Directory.Exists(@"D:\Picture\"))
{
Console.WriteLine("\n————————开始——————————");
}
else
{
DirectoryInfo directory = new DirectoryInfo(@"D:\Picture\");
directory.Create();
}
}

 

 

 

 

 

 

  

 

posted @ 2020-09-04 13:23  AetlySaber  阅读(108)  评论(0)    收藏  举报