csdn爬网

  public partial class Program
    {
        static void Main(string[] args)
        {
            CloseIE();
            string url = "http://blog.csdn.net/dz45693";
            string html = GetRequest(url);
            int count = GetPageCount(html);
  
            for (int i = 1; i <= count; i++)
            {
                string tempurl = url + "/article/list/" + i.ToString();
                html = GetRequest(tempurl);
                List<string> links = GetPageLink(html);
                foreach (string link in links)
                {
                    SendRequest(link);
                }
                CloseIE();
            }
        }

        private static void CloseIE()
        {
            Process[] ps = Process.GetProcessesByName("iexplore");
            foreach (Process item in ps)
            {
                try
                {
                    item.CloseMainWindow();
                    item.Close();
                    TerminateProcess(item.Id, 0);

                }
                catch (Exception ex)
                {
                    Trace.WriteLine(ex.Message);
                }

            }
            Thread.Sleep(1000);
        }

        static string GetRequest(string url)
        {
            try
            {
                HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
                request.Proxy = WebProxy.GetDefaultProxy();
                request.Proxy.Credentials = CredentialCache.DefaultCredentials;
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                string responseText = string.Empty;
                using (StreamReader sr = new StreamReader(response.GetResponseStream()))
                {
                    responseText = sr.ReadToEnd();
                }
                response.Close();
                request.Abort();
                return responseText;
            }
            catch (Exception ex)
            {
                Trace.WriteLine(ex.Message);
                return string.Empty;
            }
        }

        static bool SendRequest(string url)
        {
            try
            {
                //HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
                //HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                //response.Close();
                //request.Abort();
                Process p = new Process();
                p.StartInfo.Arguments = url;
                p.StartInfo.FileName = @"C:\Program Files\Internet Explorer\iexplore.exe";
                p.Start();
                Thread.Sleep(1000*10);
                p.CloseMainWindow();
              //  p.Close();
                TerminateProcess(p.Id, 0);
                return true;
              
            }
            catch (Exception ex)
            {
                Trace.WriteLine(ex.Message);
                return false;
            }
        }

        static int GetPageCount(string html)
        {
            int count = 0;
            Regex reg = new Regex(@"共(\d{1,})页");
            Match m = reg.Match(html);
            if (m.Success)
            {
                count = int.Parse(m.Groups[1].Value);
            }
            return count;
        }

        static List<string> GetPageLink(string html)
        {
            List<string> list = new List<string>();
            int startindex = html.IndexOf("article_list");
            int endindex = html.IndexOf("papelist");
            html = html.Substring(startindex, endindex - startindex);
            Regex reg = new Regex(@"/dz45693/article/details/(\d{1,})");
            MatchCollection mc = reg.Matches(html);
            foreach (Match m in mc)
            {
                string url = "http://blog.csdn.net" + m.Value;
                if(!list.Contains(url))
                list.Add(url);
            }
            return list;
        }

        [SuppressUnmanagedCodeSecurity]
        [DllImport("kernel32")]
        public static extern long TerminateProcess(int handle, int exitCode);


    }
posted on 2011-08-31 16:50 dz45693 阅读(163) 评论(0) 收藏举报
刷新页面返回顶部
dz45693

csdn爬网

导航

公告