第三方代理

三.代理

案例

import requests
import re

headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

def get_ip():
    while 1:
        url="http://www.66ip.cn/mo.php?sxb=&tqsl=5&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea="
        resp = requests.get(url, headers=headers)
        page_sourse = resp.text
        obj = re.compile(r"var mediav_ad_height = '60';.*?</script>(?P<ipss>.*?)</div>", re.S)
        result = obj.search(page_sourse)
        ips = result.group("ipss").strip().replace("\r", "").replace("\n", "").replace("\t", "").split("<br />")[0:-1]
        print(ips)
        for ip in ips:
            yield ip

def spider():
    url=""
    resp=requests.get(url,headers=headers)
    while 1:
        try:
            proxy_ip=next(gen)
            proxy={
                "http": "http://" + proxy_ip,
                # "https": "https://" + proxy_ip,
            }
            print(proxy)
            resp=requests.get(url,proxies=proxy,verify=False,headers=headers)
            resp.encoding="utf-8"
            return resp.text
        except Exception as e:
            print(e)
            print("报错了!")

if __name__ == '__main__':
    gen=get_ip()
    # for i in range(2):
    page=spider()
    print(page)

posted @ 2024-04-01 21:51  cker  阅读(42)  评论(0)    收藏  举报