url测试脚本4

###功能 #1:多线程批量url测试 #2:如需进行比对,则需修改不采用多线程 ``
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# __author__ =

import requests
from requests.packages import urllib3
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from datetime import datetime
import os


def get_status_code(url):
    """
    获取单个URL的状态码和详细信息
    """
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }

        start_time = time.time()
        # Disable warnings and verification (development only)
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        response = requests.get(url, headers=headers, timeout=8, allow_redirects=True,verify=False)
        end_time = time.time()

        return {
            "url": url,
            "final_url": response.url,
            "status_code": response.status_code,
            "success": True,
            "error_message": None,
            "response_time": end_time - start_time,
            "content_length": len(response.content),
            "redirected": url != response.url
        }

    except requests.exceptions.Timeout:
        return {
            "url": url,
            "final_url": url,
            "status_code": "Timeout",
            "success": False,
            "error_message": "请求超时(8秒)",
            "response_time": None,
            "content_length": 0,
            "redirected": False
        }
    except requests.exceptions.ConnectionError:
        return {
            "url": url,
            "final_url": url,
            "status_code": "Connection Error",
            "success": False,
            "error_message": "连接错误",
            "response_time": None,
            "content_length": 0,
            "redirected": False
        }
    except requests.exceptions.RequestException as e:
        return {
            "url": url,
            "final_url": url,
            "status_code": "Request Error",
            "success": False,
            "error_message": str(e),
            "response_time": None,
            "content_length": 0,
            "redirected": False
        }


def read_urls_from_file(filename="urls.txt"):
    """
    从文件读取URL列表
    """
    urls = []
    try:
        if not os.path.exists(filename):
            print(f"文件 {filename} 不存在")

        with open(filename, 'r', encoding='utf-8') as f:
            for line in f:
                url = line.strip()
                if url and not url.startswith('#'):
                    if not url.startswith(('http://', 'https://')):
                        url = 'https://' + url
                    urls.append(url)

        print(f"从 {filename} 读取了 {len(urls)} 个URL")
        return urls

    except Exception as e:
        print(f"读取文件错误: {e},使用示例URL")

def save_results_to_file(results, filename=None):
    """
    将结果保存到文件
    """
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"url_check_results_{timestamp}.txt"

    try:
        with open(filename, 'w', encoding='utf-8') as f:
            # 文件头部
            f.write("=" * 70 + "\n")
            f.write("URL状态码检查报告\n")
            f.write("=" * 70 + "\n")
            f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"检查URL总数: {len(results)}\n")

            # 统计信息
            success_count = sum(1 for r in results if r['success'])
            error_count = len(results) - success_count
            f.write(f"成功: {success_count}, 失败: {error_count}\n")
            f.write("=" * 70 + "\n\n")

            # 按状态码分组显示摘要
            status_groups = {}
            for result in results:
                status = str(result['status_code'])
                if status not in status_groups:
                    status_groups[status] = []
                status_groups[status].append(result)

            f.write("状态码统计:\n")
            f.write("-" * 50 + "\n")
            for status in sorted(status_groups.keys()):
                count = len(status_groups[status])
                f.write(f"{status}: {count} 个URL\n")
            f.write("\n")

            # 详细结果
            f.write("详细结果:\n")
            f.write("=" * 70 + "\n")

            for i, result in enumerate(results, 1):
                status_icon = "" if result['success'] else ""
                f.write(f"{status_icon} [{i}/{len(results)}] {result['url']}\n")
                f.write(f"   状态码: {result['status_code']}\n")

                if result['redirected']:
                    f.write(f"   重定向: {result['final_url']}\n")

                if result['response_time']:
                    f.write(f"   响应时间: {result['response_time']:.3f}秒\n")

                if result['content_length'] > 0:
                    f.write(f"   内容大小: {result['content_length']} 字节\n")

                if result['error_message']:
                    f.write(f"   错误信息: {result['error_message']}\n")

                f.write("\n")

        return filename

    except Exception as e:
        print(f"保存文件时出错: {e}")
        return None


def batch_check_urls(urls, max_workers=8):
    """
    批量检查URL状态码
    """
    if not urls:
        return []

    results = []
    total = len(urls)

    print(f"开始检查 {total} 个URL...")
    print(f"并发数: {max_workers}")
    start_time = time.time()

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {executor.submit(get_status_code, url): url for url in urls}

        for i, future in enumerate(as_completed(future_to_url), 1):
            try:
                result = future.result()
                results.append(result)

                # 每处理5个URL显示一次进度
                if i % 5 == 0 or i == total:
                    progress = i / total * 100
                    print(f"进度: {i}/{total} ({progress:.1f}%)")

            except Exception as e:
                url = future_to_url[future]
                error_result = {
                    "url": url,
                    "final_url": url,
                    "status_code": "Error",
                    "success": False,
                    "error_message": f"处理错误: {e}",
                    "response_time": None,
                    "content_length": 0,
                    "redirected": False
                }
                results.append(error_result)

    end_time = time.time()
    total_time = end_time - start_time

    # 显示统计信息
    success_count = sum(1 for r in results if r['success'])
    print(f"\n检查完成!")
    print(f"总耗时: {total_time:.2f}秒")
    print(f"平均每个URL: {total_time / len(urls):.3f}秒")
    print(f"成功: {success_count}, 失败: {len(urls) - success_count}")

    return results


def main():
    """
    主函数 - 直接运行,不需要参数
    """
    print("=" * 50)
    print("URL状态码批量检查工具")
    print("=" * 50)

    # 读取URL列表
    urls = read_urls_from_file()

    if not urls:
        print("没有找到URL,创建示例文件...")

    print(f"将检查以下URL:")
    for i, url in enumerate(urls, 1):
        print(f"  {i}. {url}")

    print("\n开始检查...(按Ctrl+C可中断)")

    try:
        # 批量检查URL
        results = batch_check_urls(urls, max_workers=6)

        # 保存结果到文件
        output_file = save_results_to_file(results)

        if output_file:
            print(f"\n结果已保存到: {output_file}")

            # 显示状态码统计
            status_stats = {}
            for result in results:
                status = str(result['status_code'])
                status_stats[status] = status_stats.get(status, 0) + 1

            print("\n状态码统计:")
            for status, count in sorted(status_stats.items()):
                print(f"  {status}: {count}个")

        input("\n按回车键退出...")

    except KeyboardInterrupt:
        print("\n用户中断操作")
    except Exception as e:
        print(f"程序执行出错: {e}")
        input("按回车键退出...")


if __name__ == "__main__":
    # 直接运行主函数
    main()

 

posted @ 2025-09-19 09:09  hopeccie  阅读(4)  评论(0)    收藏  举报