.NET图片爬虫

一时心血来潮想做一个网页图片爬虫,从网上找了好多资料,不过大多数的都看不懂,知识面太浅,历经千辛万苦,终于实现简单的完成了!
 还是要感谢老前辈,体会到自学的艰辛!路还长继续努力!!!
  1 using System;
  2 using System.Collections.Generic;
  3 using System.ComponentModel;
  4 using System.Data;
  5 using System.Drawing;
  6 using System.IO;
  7 using System.Linq;
  8 using System.Net;
  9 using System.Text;
 10 using System.Threading;
 11 using System.Threading.Tasks;
 12 using System.Windows.Forms;
 13 using Newtonsoft.Json;
 14 using Newtonsoft.Json.Linq;
 15 
 16 namespace 网页图片爬虫_窗体版
 17 {
 18     public partial class Form1 : ZHSkin.ZHForm
 19     {
 20         /// <summary>
 21         /// 关键词
 22         /// </summary>
 23         private string SavePath = string.Empty;
 24         private Thread thread = null;
 25         public Form1()
 26         {
 27             InitializeComponent();
 28         }
 29         /// <summary>
 30         /// 获取保存路径
 31         /// </summary>
 32         /// <param name="sender"></param>
 33         /// <param name="e"></param>
 34         private void btn_GetPath_Click(object sender, EventArgs e)
 35         {
 36             FolderBrowserDialog folderBrowser = new FolderBrowserDialog();
 37             if (folderBrowser.ShowDialog() == DialogResult.OK)
 38             {
 39                 txt_SavePath.Text = folderBrowser.SelectedPath;
 40             }
 41         }
 42         /// <summary>
 43         /// 开始获取
 44         /// </summary>
 45         /// <param name="sender"></param>
 46         /// <param name="e"></param>
 47         private void btn_Start_Click(object sender, EventArgs e)
 48         {
 49             string KeyWord = txt_KeyWords.Text.Trim();
 50             int pageCount = (int)NUD_PageCount.Value;
 51             SavePath = txt_SavePath.Text;
 52             if (string.IsNullOrEmpty(KeyWord))
 53             {
 54                 txt_ShowPath.AppendText("请输入要搜索的关键词!" + Environment.NewLine);
 55                 return;
 56             }
 57             if (string.IsNullOrEmpty(SavePath))
 58             {
 59                 txt_ShowPath.AppendText("请选择要保存的路径!" + Environment.NewLine);
 60                 return;
 61             }
 62             if (!Directory.Exists(SavePath))//如果不存在就创建file文件夹
 63             {
 64                 txt_ShowPath.AppendText("输入路径不正确,请核对!" + Environment.NewLine);
 65                 return;
 66             }
 67             if (!SavePath.EndsWith("\\"))
 68             {
 69                 SavePath = SavePath + "\\";
 70             }
 71             btn_Stop.Enabled = true;//启用停止按钮
 72             btn_Start.Enabled = false;//禁用开始按钮
 73             txt_ShowPath.Clear();//清空日志
 74             txt_ShowPath.AppendText("正在启动下载!" + Environment.NewLine);
 75             msg.Text = "已启动下载...";
 76             //启动进度条
 77             ProgressBar.Enabled = true;
 78             //使用多线程下载
 79             thread = new Thread(() =>
 80             {
 81                 ProcessDownload(KeyWord);
 82             });
 83             thread.Start();//启动下载
 84         }
 85         /// <summary>
 86         /// 处理下载
 87         /// </summary>
 88         /// <param name="KeyWord"></param>
 89         public void ProcessDownload(string KeyWord)
 90         {
 91             try
 92             {
 93                 int pageCount = (int)NUD_PageCount.Value;//下载页数
 94                                                          //循环获取路径
 95                 for (int i = 0; i < pageCount; i++)
 96                 {
 97                     msg.Text = "正在下载第" + (i + 1) + "页,共" + pageCount + "";
 98                     string URL = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=" + Uri.EscapeDataString(KeyWord) + "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word=" + Uri.EscapeDataString(KeyWord) + "&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&cg=wallpaper&pn=" + (i + 1) * 60 + "&rn=60&gsm=3c&1525422519486=";
 99                     HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
100                     using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
101                     {
102                         if (response.StatusCode == HttpStatusCode.OK)
103                         {
104                             using (Stream stream = response.GetResponseStream())
105                             {
106                                 try
107                                 {
108                                     DownloadPage(stream, i);//下载页面
109                                 }
110                                 catch (Exception ex)
111                                 {
112                                     txt_ShowPath.BeginInvoke(new Action(() =>
113                                     {
114                                         txt_ShowPath.AppendText(ex.Message + Environment.NewLine);
115                                     }));
116                                 }
117                             }
118                         }
119                         else
120                         {
121                             MessageBox.Show("获取第" + i + "页失败:" + response.StatusCode);
122                         }
123                     }
124                 }
125                 msg.Text = "下载完成!下载" + pageCount + "页,共" + pageCount * 60 + "张图片";
126                 ProgressBar.Value = 0;//进度条归零
127                 btn_Stop.Enabled = false;//禁用停止按钮
128                 btn_Start.Enabled = true;//启用开始按钮
129                 btn_Stop.Text = "停止";
130             }
131             catch (Exception)
132             {
133                 txt_ShowPath.BeginInvoke(new Action(() =>
134                 {
135                     txt_ShowPath.AppendText("网络连接失败!" + Environment.NewLine);
136                 }));
137             }
138         }
139         /// <summary>
140         /// 下载页面
141         /// </summary>
142         /// <param name="stream"></param>
143         private void DownloadPage(Stream stream, int index)
144         {
145             using (StreamReader reader = new StreamReader(stream))
146             {
147                 string json = reader.ReadToEnd();
148                 //txt_ShowPath.AppendText(json);
149                 JObject objRoot = (JObject)JsonConvert.DeserializeObject(json);
150                 JArray imgs = (JArray)objRoot["data"];
151                 txt_ShowPath.BeginInvoke(new Action(() =>
152                 {
153                     txt_ShowPath.AppendText("正在下载第" + (index + 1) + "页!" + Environment.NewLine);
154                 }));
155                 for (int i = 0; i < imgs.Count; i++)
156                 {
157                     JObject img = (JObject)imgs[i];
158                     string objUrl = (string)img["middleURL"];
159                     try
160                     {
161 
162                         DownloadImage(objUrl);//下载   
163                         SetTextMessage(100 * i / imgs.Count);
164                     }
165                     catch (Exception ex)
166                     {
167                         txt_ShowPath.BeginInvoke(new Action(() =>
168                         {
169                             txt_ShowPath.AppendText(ex.Message + Environment.NewLine);
170                         }));
171                     }
172                 }
173                 txt_ShowPath.BeginInvoke(new Action(() =>
174                 {
175                     txt_ShowPath.AppendText("" + (index + 1) + "页下载完成!" + Environment.NewLine);
176                 }));
177             }
178         }
179         /// <summary>
180         /// 下载图片
181         /// </summary>
182         /// <param name="url"></param>
183         private void DownloadImage(string objUrl)
184         {
185             txt_ShowPath.AppendText("正在下载:" + Path.GetFileName(objUrl) + "" + Environment.NewLine);
186             //URLRefer://这个图片是从哪个页面启动下载的
187             string destFile = Path.Combine(SavePath, Path.GetFileName(objUrl));//获取保存路径
188             HttpWebRequest request = (HttpWebRequest)WebRequest.Create(objUrl);
189             request.Referer = "https://image.baidu.com/";//欺骗浏览器
190             using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
191             {
192                 if (response.StatusCode == HttpStatusCode.OK)
193                 {
194                     using (Stream stream = response.GetResponseStream())
195                     using (Stream FStream = new FileStream(destFile, FileMode.Create))
196                     {
197                         stream.CopyTo(FStream);
198                         txt_ShowPath.AppendText("下载成功!" + Environment.NewLine);
199                     }
200                 }
201                 else
202                 {
203                     throw new Exception("下载" + objUrl + "失败,错误码:" + response.StatusCode);
204                 }
205             }
206         }
207         /// <summary>
208         /// 进度条代理
209         /// </summary>
210         /// <param name="ipos"></param>
211         /// <param name="vinfo"></param>
212         private delegate void SetPos(int ipos);
213         /// <summary>
214         /// 进度条更新函数
215         /// </summary>
216         /// <param name="ipos"></param>
217         /// <param name="vinfo"></param>
218         private void SetTextMessage(int ipos)
219         {
220             if (this.InvokeRequired)
221             {
222                 SetPos pos = new SetPos(SetTextMessage);
223                 this.Invoke(pos, new object[] { ipos });
224             }
225             else
226             {
227                 this.ProgressBar.Value = Convert.ToInt32(ipos);
228             }
229         }
230         /// <summary>
231         /// 停止下载
232         /// </summary>
233         /// <param name="sender"></param>
234         /// <param name="e"></param>
235         private void btn_Stop_Click(object sender, EventArgs e)
236         {
237             if (thread != null)
238             {
239                 if (btn_Stop.Text == "暂 停")
240                 {
241 
242                     thread.Suspend();//挂起线程线程
243                     txt_ShowPath.AppendText("已暂停下载!" + Environment.NewLine);
244                     btn_Stop.Text = "继 续";
245                 }
246                 else if (btn_Stop.Text == "继 续")
247                 {
248                     thread.Resume();//挂起线程线程
249                     txt_ShowPath.AppendText("开始下载!" + Environment.NewLine);
250                     btn_Stop.Text = "暂 停";
251                 }
252             }
253         }
254         /// <summary>
255         /// 窗体关闭时关闭所有线程
256         /// </summary>
257         /// <param name="sender"></param>
258         /// <param name="e"></param>
259         private void Form1_FormClosed(object sender, FormClosedEventArgs e)
260         {
261             if (thread != null)
262             {
263                 thread.Abort();
264             }
265         }
266         /// <summary>
267         /// 设置滚动条
268         /// </summary>
269         /// <param name="sender"></param>
270         /// <param name="e"></param>
271         private void txt_ShowPath_TextChanged(object sender, EventArgs e)
272         {
273             txt_ShowPath.SelectionStart = txt_ShowPath.Text.Length;
274             txt_ShowPath.ScrollToCaret();
275         }
276     }
277 }

 

 

posted @ 2018-05-04 21:42  墨心羽  阅读(147)  评论(0)    收藏  举报