BeautifulSoup

#IP地址取自国内髙匿代理IP网站:http://www.xicidaili.com/nn/  
from bs4 import BeautifulSoup
import requests,random

def get_ipInfors(url, headers):
    '''
    爬取IP数据,单个IP信息以json格式存储,所有json格式的IP数据信息放入列表中
    return:ip_infor
    '''
    web_data = requests.get(url, headers=headers)
    soup = BeautifulSoup(web_data.text, 'lxml')
    nodes = soup.find_all('tr')

    for node in nodes[1:]:
        ip_ = node.find_all('td')
        ip_address = ip_[1].text
        ip_port = ip_[2].text
        ip_type = ip_[5].text
        ip_time = ip_[8].text

    ip_infors = {
        "ip_address" : ip_address,
        "ip_port" : ip_port,
        "ip_type" : ip_type,
        "ip_time" : ip_time
    }
    return ip_infors

def write_ipInfors(ip_infors):
    '''
    将IP数据写入文件中
    '''
    for ip_infor in ip_infors:
        f=open('IP.txt','a+',encoding='utf-8')
        f.write(ip_infors)
        f.write('\n')
        f.close()

if __name__ == '__main__':
    for i in range(1,10):
        url = 'https://www.xicidaili.com/nn/{}'.format(i)
        headers = {
            'Host': 'www.xicidaili.com',
            'Referer': 'https://www.xicidaili.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
        }
        ip_infors = get_ipInfors(url, headers=headers)
        proxies = write_ipInfors(ip_infors)

 

import requests
from bs4 import BeautifulSoup
import random
import time

user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0'
]

def get_ipInfors(url, headers):
    ip_infors = []
    try:
        web_data = requests.get(url, headers=headers)
        web_data.raise_for_status()  # 检查请求是否成功
        soup = BeautifulSoup(web_data.text, 'html.parser')  # 使用 html.parser 解析器
        nodes = soup.find_all('tr', class_='odd')  # 找到所有包含IP信息的行

        for node in nodes:
            ip_ = node.find_all('td')
            if len(ip_) > 7:  # 确保行中包含足够的数据
                ip_address = ip_[1].text
                ip_port = ip_[2].text
                ip_type = ip_[5].text
                ip_time = ip_[8].text

                ip_infors.append({
                    "ip_address": ip_address,
                    "ip_port": ip_port,
                    "ip_type": ip_type,
                    "ip_time": ip_time
                })
    except Exception as e:
        print(f"Error occurred: {e}")
    return ip_infors

def write_ipInfors(ip_infors):
    with open('IP.txt', 'a+', encoding='utf-8') as f:
        for ip_infor in ip_infors:
            f.write(f"{ip_infor}\n")

if __name__ == '__main__':
    headers = {
        'Host': 'www.kuaidaili.com',
        'Referer': 'https://www.kuaidaili.com/',
        'User-Agent': random.choice(user_agents),
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5'
    }
    for i in range(1, 10):
        url = f'https://www.kuaidaili.com/free/inha/{i}'
        ip_infors = get_ipInfors(url, headers=headers)
        write_ipInfors(ip_infors)
        time.sleep(1)  # 每次请求之间暂停1秒

  

import requests
from bs4 import BeautifulSoup
import random
import time

user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0'
]

def get_ipInfors(url, headers):
    ip_infors = []
    try:
        web_data = requests.get(url, headers=headers)
        web_data.raise_for_status()  # 检查请求是否成功
        soup = BeautifulSoup(web_data.text, 'html.parser')  # 使用 html.parser 解析器
        nodes = soup.find_all('tr', class_='odd')  # 找到所有包含IP信息的行

        for node in nodes:
            ip_ = node.find_all('td')
            if len(ip_) > 7:  # 确保行中包含足够的数据
                ip_address = ip_[1].text
                ip_port = ip_[2].text
                ip_type = ip_[5].text
                ip_time = ip_[8].text

                ip_infors.append({
                    "ip_address": ip_address,
                    "ip_port": ip_port,
                    "ip_type": ip_type,
                    "ip_time": ip_time
                })
    except Exception as e:
        print(f"Error occurred: {e}")
    return ip_infors

def write_ipInfors(ip_infors):
    with open('IP.txt', 'a+', encoding='utf-8') as f:
        for ip_infor in ip_infors:
            f.write(f"{ip_infor}\n")

if __name__ == '__main__':
    headers = {
        'Host': 'www.kuaidaili.com',
        'Referer': 'https://www.kuaidaili.com/',
        'User-Agent': random.choice(user_agents),
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5'
    }
    for i in range(1, 10):
        url = f'https://www.kuaidaili.com/free/inha/{i}'
        ip_infors = get_ipInfors(url, headers=headers)
        write_ipInfors(ip_infors)
        time.sleep(1)  # 每次请求之间暂停1秒

  

import requests
from bs4 import BeautifulSoup

def get_ipInfors(url, headers):
    ip_infors = []
    try:
        web_data = requests.get(url, headers=headers)
        web_data.raise_for_status()  # 检查请求是否成功
        soup = BeautifulSoup(web_data.text, 'html.parser')  # 使用 html.parser 解析器
        nodes = soup.find_all('tr', class_='odd')  # 找到所有包含IP信息的行

        for node in nodes:
            ip_ = node.find_all('td')
            if len(ip_) > 7:  # 确保行中包含足够的数据
                ip_address = ip_[1].text
                ip_port = ip_[2].text
                ip_type = ip_[5].text
                ip_time = ip_[8].text

                ip_infors.append({
                    "ip_address": ip_address,
                    "ip_port": ip_port,
                    "ip_type": ip_type,
                    "ip_time": ip_time
                })
    except Exception as e:
        print(f"Error occurred: {e}")
    return ip_infors

def write_ipInfors(ip_infors):
    with open('IP.txt', 'a+', encoding='utf-8') as f:
        for ip_infor in ip_infors:
            f.write(f"{ip_infor}\n")

if __name__ == '__main__':
    headers = {
        'Host': 'www.kuaidaili.com',
        'Referer': 'https://www.kuaidaili.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    }
    for i in range(1, 10):
        url = f'https://www.kuaidaili.com/free/inha/{i}'
        ip_infors = get_ipInfors(url, headers=headers)
        write_ipInfors(ip_infors)

  

 

#IP地址取自国内髙匿代理IP网站:http://www.xicidaili.com/nn/  
from bs4 import BeautifulSoup
import requests,random

def get_ipInfors(url, headers):
    '''
    爬取IP数据,单个IP信息以json格式存储,所有json格式的IP数据信息放入列表中
    return:ip_infor
    '''
    web_data = requests.get(url, headers=headers)
    soup = BeautifulSoup(web_data.text, 'lxml')
    nodes = soup.find_all('tr')

    for node in nodes[1:]:
        ip_ = node.find_all('td')
        ip_address = ip_[1].text
        ip_port = ip_[2].text
        ip_type = ip_[5].text
        ip_time = ip_[8].text

    ip_infors = {
        "ip_address" : ip_address,
        "ip_port" : ip_port,
        "ip_type" : ip_type,
        "ip_time" : ip_time
    }
    return ip_infors

def write_ipInfors(ip_infors):
    '''
    将IP数据写入文件中
    '''
    for ip_infor in ip_infors:
        f=open('IP.txt','a+',encoding='utf-8')
        f.write(ip_infors)
        f.write('\n')
        f.close()

if __name__ == '__main__':
    for i in range(1,10):
        url = 'https://www.kuaidaili.com/free/inha/'  # 快代理的网址
        headers = {
            'Host': 'www.kuaidaili.com',
            'Referer': 'https://www.kuaidaili.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
        }
        ip_infors = get_ipInfors(url, headers=headers)
        proxies = write_ipInfors(ip_infors)

  

 

posted @ 2025-09-01 17:21  aiplus  阅读(6)  评论(0)    收藏  举报
悬浮按钮示例