csdn爬网
public partial class Program
{
static void Main(string[] args)
{
CloseIE();
string url = "http://blog.csdn.net/dz45693";
string html = GetRequest(url);
int count = GetPageCount(html);
for (int i = 1; i <= count; i++)
{
string tempurl = url + "/article/list/" + i.ToString();
html = GetRequest(tempurl);
List<string> links = GetPageLink(html);
foreach (string link in links)
{
SendRequest(link);
}
CloseIE();
}
}
private static void CloseIE()
{
Process[] ps = Process.GetProcessesByName("iexplore");
foreach (Process item in ps)
{
try
{
item.CloseMainWindow();
item.Close();
TerminateProcess(item.Id, 0);
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
}
}
Thread.Sleep(1000);
}
static string GetRequest(string url)
{
try
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
request.Proxy = WebProxy.GetDefaultProxy();
request.Proxy.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
string responseText = string.Empty;
using (StreamReader sr = new StreamReader(response.GetResponseStream()))
{
responseText = sr.ReadToEnd();
}
response.Close();
request.Abort();
return responseText;
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
return string.Empty;
}
}
static bool SendRequest(string url)
{
try
{
//HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
//HttpWebResponse response = (HttpWebResponse)request.GetResponse();
//response.Close();
//request.Abort();
Process p = new Process();
p.StartInfo.Arguments = url;
p.StartInfo.FileName = @"C:\Program Files\Internet Explorer\iexplore.exe";
p.Start();
Thread.Sleep(1000*10);
p.CloseMainWindow();
// p.Close();
TerminateProcess(p.Id, 0);
return true;
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
return false;
}
}
static int GetPageCount(string html)
{
int count = 0;
Regex reg = new Regex(@"共(\d{1,})页");
Match m = reg.Match(html);
if (m.Success)
{
count = int.Parse(m.Groups[1].Value);
}
return count;
}
static List<string> GetPageLink(string html)
{
List<string> list = new List<string>();
int startindex = html.IndexOf("article_list");
int endindex = html.IndexOf("papelist");
html = html.Substring(startindex, endindex - startindex);
Regex reg = new Regex(@"/dz45693/article/details/(\d{1,})");
MatchCollection mc = reg.Matches(html);
foreach (Match m in mc)
{
string url = "http://blog.csdn.net" + m.Value;
if(!list.Contains(url))
list.Add(url);
}
return list;
}
[SuppressUnmanagedCodeSecurity]
[DllImport("kernel32")]
public static extern long TerminateProcess(int handle, int exitCode);
}
    windows技术爱好者
                    
                
                
            
        
浙公网安备 33010602011771号