代码
1 public static string getProHtml(string url)
2 {
3 var container = new CookieContainer();
4 var request = (HttpWebRequest)WebRequest.Create(url);
5 request.CookieContainer = container;
6 var reader = new StreamReader(request.GetResponse().GetResponseStream());
7 var result = reader.ReadToEnd();
8 reader.Close();
9
10 if (result.Contains("location.href="))
11 {
12 string actionUrl = result.Remove(0, result.IndexOf("location.href='") + 15);
13 actionUrl = actionUrl.Substring(0, actionUrl.IndexOf("'"));
14
15 request = (HttpWebRequest)WebRequest.Create("http://ccn.mofcom.gov.cn/" + actionUrl);
16 request.CookieContainer = container;
17 Encoding encode = Encoding.Default;
18 reader = new StreamReader(request.GetResponse().GetResponseStream(), encode);
19 result = reader.ReadToEnd();
20 reader.Close();
21
22 request = (HttpWebRequest)WebRequest.Create(url);
23 request.CookieContainer = container;
24 reader = new StreamReader(request.GetResponse().GetResponseStream(), encode);
25 result = reader.ReadToEnd();
26 reader.Close();
27 }
28 return result;
29 }
2 {
3 var container = new CookieContainer();
4 var request = (HttpWebRequest)WebRequest.Create(url);
5 request.CookieContainer = container;
6 var reader = new StreamReader(request.GetResponse().GetResponseStream());
7 var result = reader.ReadToEnd();
8 reader.Close();
9
10 if (result.Contains("location.href="))
11 {
12 string actionUrl = result.Remove(0, result.IndexOf("location.href='") + 15);
13 actionUrl = actionUrl.Substring(0, actionUrl.IndexOf("'"));
14
15 request = (HttpWebRequest)WebRequest.Create("http://ccn.mofcom.gov.cn/" + actionUrl);
16 request.CookieContainer = container;
17 Encoding encode = Encoding.Default;
18 reader = new StreamReader(request.GetResponse().GetResponseStream(), encode);
19 result = reader.ReadToEnd();
20 reader.Close();
21
22 request = (HttpWebRequest)WebRequest.Create(url);
23 request.CookieContainer = container;
24 reader = new StreamReader(request.GetResponse().GetResponseStream(), encode);
25 result = reader.ReadToEnd();
26 reader.Close();
27 }
28 return result;
29 }
当你用抓取代码直接抓取 “继续浏览该企业信息”的链接的时候,出来的只是一段代码,因为没有状态保存,加上状态保存之后,就可以正常抓取了。

浙公网安备 33010602011771号