url测试脚本4
###功能 #1:多线程批量url测试 #2:如需进行比对,则需修改不采用多线程 ``
#!/usr/bin/env python3 # -*- coding:utf-8 -*- # __author__ = import requests from requests.packages import urllib3 from concurrent.futures import ThreadPoolExecutor, as_completed import time from datetime import datetime import os def get_status_code(url): """ 获取单个URL的状态码和详细信息 """ try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } start_time = time.time() # Disable warnings and verification (development only) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) response = requests.get(url, headers=headers, timeout=8, allow_redirects=True,verify=False) end_time = time.time() return { "url": url, "final_url": response.url, "status_code": response.status_code, "success": True, "error_message": None, "response_time": end_time - start_time, "content_length": len(response.content), "redirected": url != response.url } except requests.exceptions.Timeout: return { "url": url, "final_url": url, "status_code": "Timeout", "success": False, "error_message": "请求超时(8秒)", "response_time": None, "content_length": 0, "redirected": False } except requests.exceptions.ConnectionError: return { "url": url, "final_url": url, "status_code": "Connection Error", "success": False, "error_message": "连接错误", "response_time": None, "content_length": 0, "redirected": False } except requests.exceptions.RequestException as e: return { "url": url, "final_url": url, "status_code": "Request Error", "success": False, "error_message": str(e), "response_time": None, "content_length": 0, "redirected": False } def read_urls_from_file(filename="urls.txt"): """ 从文件读取URL列表 """ urls = [] try: if not os.path.exists(filename): print(f"文件 {filename} 不存在") with open(filename, 'r', encoding='utf-8') as f: for line in f: url = line.strip() if url and not url.startswith('#'): if not url.startswith(('http://', 'https://')): url = 'https://' + url urls.append(url) print(f"从 {filename} 读取了 {len(urls)} 个URL") return urls except Exception as e: print(f"读取文件错误: {e},使用示例URL") def save_results_to_file(results, filename=None): """ 将结果保存到文件 """ if filename is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"url_check_results_{timestamp}.txt" try: with open(filename, 'w', encoding='utf-8') as f: # 文件头部 f.write("=" * 70 + "\n") f.write("URL状态码检查报告\n") f.write("=" * 70 + "\n") f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"检查URL总数: {len(results)}\n") # 统计信息 success_count = sum(1 for r in results if r['success']) error_count = len(results) - success_count f.write(f"成功: {success_count}, 失败: {error_count}\n") f.write("=" * 70 + "\n\n") # 按状态码分组显示摘要 status_groups = {} for result in results: status = str(result['status_code']) if status not in status_groups: status_groups[status] = [] status_groups[status].append(result) f.write("状态码统计:\n") f.write("-" * 50 + "\n") for status in sorted(status_groups.keys()): count = len(status_groups[status]) f.write(f"{status}: {count} 个URL\n") f.write("\n") # 详细结果 f.write("详细结果:\n") f.write("=" * 70 + "\n") for i, result in enumerate(results, 1): status_icon = "✅" if result['success'] else "❌" f.write(f"{status_icon} [{i}/{len(results)}] {result['url']}\n") f.write(f" 状态码: {result['status_code']}\n") if result['redirected']: f.write(f" 重定向: {result['final_url']}\n") if result['response_time']: f.write(f" 响应时间: {result['response_time']:.3f}秒\n") if result['content_length'] > 0: f.write(f" 内容大小: {result['content_length']} 字节\n") if result['error_message']: f.write(f" 错误信息: {result['error_message']}\n") f.write("\n") return filename except Exception as e: print(f"保存文件时出错: {e}") return None def batch_check_urls(urls, max_workers=8): """ 批量检查URL状态码 """ if not urls: return [] results = [] total = len(urls) print(f"开始检查 {total} 个URL...") print(f"并发数: {max_workers}") start_time = time.time() with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_url = {executor.submit(get_status_code, url): url for url in urls} for i, future in enumerate(as_completed(future_to_url), 1): try: result = future.result() results.append(result) # 每处理5个URL显示一次进度 if i % 5 == 0 or i == total: progress = i / total * 100 print(f"进度: {i}/{total} ({progress:.1f}%)") except Exception as e: url = future_to_url[future] error_result = { "url": url, "final_url": url, "status_code": "Error", "success": False, "error_message": f"处理错误: {e}", "response_time": None, "content_length": 0, "redirected": False } results.append(error_result) end_time = time.time() total_time = end_time - start_time # 显示统计信息 success_count = sum(1 for r in results if r['success']) print(f"\n检查完成!") print(f"总耗时: {total_time:.2f}秒") print(f"平均每个URL: {total_time / len(urls):.3f}秒") print(f"成功: {success_count}, 失败: {len(urls) - success_count}") return results def main(): """ 主函数 - 直接运行,不需要参数 """ print("=" * 50) print("URL状态码批量检查工具") print("=" * 50) # 读取URL列表 urls = read_urls_from_file() if not urls: print("没有找到URL,创建示例文件...") print(f"将检查以下URL:") for i, url in enumerate(urls, 1): print(f" {i}. {url}") print("\n开始检查...(按Ctrl+C可中断)") try: # 批量检查URL results = batch_check_urls(urls, max_workers=6) # 保存结果到文件 output_file = save_results_to_file(results) if output_file: print(f"\n结果已保存到: {output_file}") # 显示状态码统计 status_stats = {} for result in results: status = str(result['status_code']) status_stats[status] = status_stats.get(status, 0) + 1 print("\n状态码统计:") for status, count in sorted(status_stats.items()): print(f" {status}: {count}个") input("\n按回车键退出...") except KeyboardInterrupt: print("\n用户中断操作") except Exception as e: print(f"程序执行出错: {e}") input("按回车键退出...") if __name__ == "__main__": # 直接运行主函数 main()
浙公网安备 33010602011771号