urllib urllib2

#-*-coding:utf-8-*-
import urllib
import urllib2
import cookielib
##urllib
url="http://www.qq.com"
header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"}

def urllib_study(url):
urldata=urllib.urlopen(url)
print(urldata.read().decode('gbk'))##网页内容
print(urldata.getcode())##http状态码
print(urldata.geturl())## request url
print(urldata.info()) ##header info urldata.info().getparam('charset')获取header 编码
urllib.urlretrieve(url,'D:\\pic\\')##下载文件
def urlretrieve_study(url,savepath):
def callback(a,b,c):
down_progess=100.0*a*b/c
if down_progess>100:
down_progess=100
print '%.2f%%'%down_progess
urllib.urlretrieve(url,savepath,callback)

#urlretrieve_study(url,'D:\\pics\\qq.html')
##urllib2
def urllib2_study(url):
urldata=urllib2.urlopen(url)
 
#-*-coding:utf-8-*-
import urllib2
import random
url=""
useragent=["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",

]
def get_content(url,useragent):
    """
    @
    """
    random_header=random.choice(useragent)
    req=urllib2.Request(url)
    req.add_header('User-Agent',random_header)
    req.add_header('Host',)
    req.add_header("Refer",)
    req.add_header('GET',)
    ret=urllib2.urlopen(req).read()
    return ret

  

print(dir(urldata))##urldata.read.decode('gbk') urldata.getcode() urldata.info() urldata.geturl()
cookie=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
data=opener.open(url)##urllib2.urlopen其实也是调用opener.open
print(data.read().decode('gbk'))
def urllib2_post(url):
values={'name':'howhy','age':32}
data=urllib.urlencode(values)
req=urllib2.Request(url,data,header)
response=urllib2.urlopen(req)
the_page=response.read()
def handler():##一般handler
    
    htp=urllib2.HTTPHandler()
    opener=urllib2.build_opener(htp)
    req=urllib2.Request("http://www.baidu.com/")
    print(opener.open(req).read())

def proxy():##代理handler
    hand=urllib2.ProxyHandler({"http":"219.141.153.41:80"})
    opener=urllib2.build_opener(hand)
    req=urllib2.Request("http://www.baidu.com/")
    print(opener.open(req).read())
def authhandler():##基本认证handler
    htppwd=urllib2.HTTPPasswordMgrWithDefaultRealm()
    htppwd.add_password(None,"auth web","username","password")
    htp=urllib2.HTTPBasicAuthHandler(htppwd)
    opener=urllib2.build_opener(htp) ##此时可以加多个handler
opener.addheaders=[("User-Agent","dsffsdfdsfd"),()] req
=urllib2.Request("http://auth web") print(opener.open(req).read())

 

 

posted @ 2017-06-16 15:14  howhy  阅读(176)  评论(0编辑  收藏  举报