Python urllib2 调试

#!/usr/bin/env python
# coding=utf-8
__author__ = 'zhaoyingnan'
import urllib
import urllib2
import chardet


class HtmlLoader:
    def urlLoad(self, sUrl, isPost=False, isDebug=False):
        try:
            sContent = None
            dictHeaders = {
                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/51.0.2704.79 Chrome/51.0.2704.79 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Charset': 'utf-8',
                'Referer': 'https://www.baidu.com/?tn=92765401_hao_pg',
                'Accept-Language': 'zh-CN,zh;q=0.8',
            }

            dictData = {}
            sData = urllib.urlencode(dictData)
            if (isPost is True):
                Request = urllib2.Request(url=sUrl, data=sData, headers=dictHeaders)
            else:
                Request = urllib2.Request(url=sUrl, headers=dictHeaders)

            # print Request.get_method()  # 打印请求类型

            # 代理
            ProxyHandler = urllib2.ProxyHandler({})

            # 调试
            httpDebugHandler = urllib2.HTTPHandler(debuglevel=1)
            httpsDebugHandler = urllib2.HTTPSHandler(debuglevel=1)

            if (isDebug is True):
                Opener = urllib2.build_opener(ProxyHandler, httpDebugHandler, httpsDebugHandler)
            else:
                Opener = urllib2.build_opener(ProxyHandler)

            urllib2.install_opener(Opener)
            Response = Opener.open(Request)
            iCode = Response.getcode()
            if (iCode == 200):
                sContent = Response.read()

            if (sContent is not None):
                # print(chardet.detect(sContent))
                encoding = chardet.detect(sContent)
                if (encoding['encoding'].startswith("GB")):
                    sContent = sContent.decode("GBK")
            return sContent
        except Exception as e:
            print(e)
        finally:
            pass


if (__name__ == '__main__'):
    sUrl = 'http://www.cnblogs.com/yingnan/p/5659966.html'
    demo = HtmlLoader()
    sContent = demo.urlLoad(sUrl=sUrl, isPost=False, isDebug=True)
    print sContent

posted @ 2016-09-19 10:48 zhaoyingnan911 阅读(371) 评论(0) 收藏举报

刷新页面返回顶部

wuye

富不学富不长穷不学穷不尽

Python urllib2 调试

公告

wuye

富不学富不长 穷不学穷不尽

Python urllib2 调试

公告

富不学富不长穷不学穷不尽