python 爬虫不停换代理

内网看到的一个小技巧,卧槽,感觉真TM厉害

函数均放到main函数内即可

def get_proxy():
    url="http://www.xicidaili.com"
    req=urllib.request.Request(url,headers=headers)
    response=urllib.request.urlopen(req)
    html=response.read().decode("utf-8")
    IP = re.compile('<td>(\d+)\.(\d+)\.(\d+)\.(\d+)</td>\s*<td>(\d+)</td>')
    proxy_ip=IP.findall(html)
    for each in proxy_ip:
        proxies.append(":".join([(".".join(each[0:4])),each[4]]))
    return proxies


def change_proxy():
    proxy=random.choice(proxies)
    if proxy==None:
        proxy_support=urllib.request.ProxyHandler({})
    else:
        proxy_support = urllib.request.ProxyHandler({"http": proxy})
    opener = urllib.request.build_opener(proxy_support)
    opener.addheaders=[("User-Agent",headers["User-Agent"])]
    urllib.request.install_opener(opener)
    print('智能切换代理:%s' % ('本机' if proxy == None else proxy))
posted @ 2017-05-09 09:55  qscqesze  阅读(1149)  评论(0编辑  收藏  举报