免费代理IP地址列表

import time
import requests
import random
from bs4 import BeautifulSoup

IP_POOL = []
def get_max_proxy():
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
        "Host": "www.kuaidaili.com",
        "origin": "https://www.kuaidaili.com"
    }
    html = requests.get(url="https://www.kuaidaili.com/free/inha/1/", headers=headers).text
    soup = BeautifulSoup(html,"lxml")
    div = soup.find_all(name="div",attrs={"id":"listnav"})
    max = 1
    for d in div:
        for num in d.find_all(name="a"):
            if int(num.text) > max:max = int(num.text)
    return max

def get_proxy_list(max):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
        "Host": "www.kuaidaili.com",
        "origin": "https://www.kuaidaili.com"
    }
    for i in range(1,max):
        if i>3:return IP_POOL#调试限制使用3个
        time.sleep(0.2*random.randint(1,5))
        html = requests.get(url="https://www.kuaidaili.com/free/inha/{}/".format(i), headers=headers).text
        soup = BeautifulSoup(html,"lxml")
        tbody =soup.find_all(name="tbody")
        for tb in tbody:
            for t in tb.find_all(name="td",attrs={"data-title":"IP"}):
                IP_POOL.append(t.text)
    return IP_POOL

max=get_max_proxy()
IP_POOL=get_proxy_list(max)
print(len(IP_POOL))
print(IP_POOL)

posted on 2021-11-10 00:10 topass123 阅读(962) 评论(0) 编辑收藏举报

道阻且长，行则将至，行而不辍，未来可期

免费代理IP地址列表