利用TcpClient HTTP协议获得网页源码

        private string GetHTMLTCP(string URL)
        {
            
string strHTML = "";//用来保存获得的HTML代码
            TcpClient clientSocket = new TcpClient();
            Uri URI 
= new Uri(URL);
            clientSocket.Connect(URI.Host, URI.Port);
            StringBuilder RequestHeaders 
= new StringBuilder();//用来保存HTML协议头部信息
            RequestHeaders.AppendFormat("{0} {1} HTTP/1.1\r\n","GET"/*此处可填写GET或POST*/,URI.PathAndQuery);
            RequestHeaders.AppendFormat(
"Connection:close\r\n");
            RequestHeaders.AppendFormat(
"Host:{0}\r\n", URI.Host);
            RequestHeaders.AppendFormat(
"Accept:*/*\r\n");
            RequestHeaders.AppendFormat(
"Accept-Language:zh-cn\r\n");
            RequestHeaders.AppendFormat(
"User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)\r\n\r\n");
            Encoding encoding 
= Encoding.Default;
            
byte[] request = encoding.GetBytes(RequestHeaders.ToString());
            clientSocket.Client.Send(request);
            
//获取要保存的网络流
            Stream readStream = clientSocket.GetStream();
            StreamReader sr 
= new StreamReader(readStream, Encoding.Default);
            strHTML 
= sr.ReadToEnd();


            readStream.Close();
            clientSocket.Close();
            
            
return strHTML;
        }

以上代码在VS2008环境下测试通过

posted on 2009-08-18 11:27  空空空  阅读(820)  评论(0编辑  收藏  举报

导航