HttpWebResponse获取DOM数据注意之ContentEncoding

public string GetKaiJ(string type = "ssq")
        {
            /*1.通过WebBrowser控件&HttpWebRequest获得网站信息*/
            switch (type)
            {
                case "ssq":
                    req = (HttpWebRequest)HttpWebRequest.Create(url_ssq);
                    break;
                case "dlt":
                    req = (HttpWebRequest)HttpWebRequest.Create(url_dlt);
                    break;
                default:
                    req = (HttpWebRequest)HttpWebRequest.Create(url_ssq);
                    break;
               }

            req.ContentType = "text/html";
            req.Method = "GET";
            req.Timeout = 1000;
            /*2.数据处理*/   
            HttpWebResponse response = (HttpWebResponse)req.GetResponse();

            /**在这里对接收到的页面内容进行处理
             * 说明:HttpWebResponse.ContentEncoding会返回两种情况:ContentEncoding="";ContentEncoding="gzip";
             * gzip的情况需要解压,就是第一种情况;注意gzip要小写;
             * 如果不注意这两种情况,网站返回的数据有时候会间隔以这两种情况返回;造成有时候编码正常,有时乱码
             * 对于这两种情况要进行分别的数据解析
             */
            if (response.ContentEncoding == "gzip") {
                //using (Stream resStream = response.GetResponseStream())
                using (var resStream = new System.IO.Compression.GZipStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress))
                {
                    //using (StreamReader reader = new StreamReader(resStream,myEncoding))
                    using (var reader = new StreamReader(resStream, System.Text.Encoding.Default))
                    {
                        responseContent = reader.ReadToEnd().ToString();

                        // Read and display lines from the file until the end of
                        // the file is reached.
                        //string line;
                        //while ((line = reader.ReadLine()) != null)
                        //{
                        //    Console.WriteLine(line);
                        //}
                    }
                }
            } else if (response.ContentEncoding == "") {
                using (var resStream = response.GetResponseStream())
                //using (Stream resStream = new System.IO.Compression.GZipStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress))
                {
                    //using (StreamReader reader = new StreamReader(resStream,myEncoding))
                    using (StreamReader reader = new StreamReader(resStream, myEncoding_default))
                    {
                        responseContent = reader.ReadToEnd().ToString();

                        // Read and display lines from the file until the end of
                        // the file is reached.
                        //string line;
                        //while ((line = reader.ReadLine()) != null)
                        //{
                        //    Console.WriteLine(line);
                        //}
                    }
                }
            } else { }
            return $"{responseContent}\nresponseContentEncoding:{response.ContentEncoding}"; 
        }

 

posted @ 2020-08-25 15:00  nick_JD  阅读(331)  评论(0编辑  收藏  举报