python 实现服务器监控,cpu,内存,磁盘空间,网络等

Posted on 2026-03-25 17:53  打杂滴  阅读(3)  评论(0)    收藏  举报


import psutil
import requests
import smtplib
import logging
import time
from datetime import datetime
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from typing import Dict, List, Tuple
import json
import argparse
import sys

# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('server_monitor.log'),
logging.StreamHandler(sys.stdout)
]
)

class ServerMonitor:
def __init__(self, config_file: str = 'config.json'):
"""初始化服务器监控器"""
self.config = self.load_config(config_file)
self.alert_history = {}

def load_config(self, config_file: str) -> dict:
"""加载配置文件"""
try:
with open(config_file, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
# 创建默认配置文件
default_config = {
"servers": [
{"name": "本地服务器", "host": "127.0.0.1", "port": 80},
{"name": "百度", "host": "www.baidu.com", "port": 443}
],
"thresholds": {
"cpu_usage": 80,
"memory_usage": 85,
"disk_usage": 90
},
"check_interval": 60,
"alert_email": {
"enabled": True,
"smtp_server": "smtp.qq.com",
"smtp_port": 465,
"sender_email": "111@qq.com",
"sender_password": "password",
"recipient_emails": ["111@qq.com"]
}
}
with open(config_file, 'w', encoding='utf-8') as f:
json.dump(default_config, f, indent=4, ensure_ascii=False)
logging.info(f"已创建默认配置文件: {config_file}")
return default_config

def check_cpu_usage(self) -> Tuple[bool, float]:
"""检查CPU使用率"""
try:
cpu_percent = psutil.cpu_percent(interval=1)
threshold = self.config['thresholds']['cpu_usage']
status = cpu_percent > threshold
return status, cpu_percent
except Exception as e:
logging.error(f"检查CPU使用率时出错: {e}")
return False, 0.0

def check_memory_usage(self) -> Tuple[bool, float]:
"""检查内存使用率"""
try:
memory = psutil.virtual_memory()
memory_percent = memory.percent
threshold = self.config['thresholds']['memory_usage']
status = memory_percent > threshold
return status, memory_percent
except Exception as e:
logging.error(f"检查内存使用率时出错: {e}")
return False, 0.0

def check_disk_usage(self) -> Tuple[bool, float]:
"""检查磁盘使用率"""
try:
disk = psutil.disk_usage('/')
disk_percent = (disk.used / disk.total) * 100
threshold = self.config['thresholds']['disk_usage']
status = disk_percent > threshold
return status, disk_percent
except Exception as e:
logging.error(f"检查磁盘使用率时出错: {e}")
return False, 0.0

def check_network_connectivity(self, host: str, port: int) -> bool:
"""检查网络连通性"""
try:
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(5)
result = sock.connect_ex((host, port))
sock.close()
return result == 0
except Exception as e:
logging.error(f"检查网络连通性时出错 ({host}:{port}): {e}")
return False

def check_http_service(self, url: str) -> bool:
"""检查HTTP服务状态"""
try:
response = requests.get(url, timeout=10)
return response.status_code == 200
except Exception as e:
logging.error(f"检查HTTP服务时出错 ({url}): {e}")
return False

def send_alert_email(self, subject: str, message: str):
"""发送告警邮件"""
email_config = self.config['alert_email']
if not email_config['enabled']:
return

try:
msg = MIMEMultipart()
msg['From'] = email_config['sender_email']
msg['To'] = ', '.join(email_config['recipient_emails'])
msg['Subject'] = subject

msg.attach(MIMEText(message, 'plain', 'utf-8'))

server = smtplib.SMTP(email_config['smtp_server'], email_config['smtp_port'])
server.starttls()
server.login(email_config['sender_email'], email_config['sender_password'])
text = msg.as_string()
server.sendmail(email_config['sender_email'], email_config['recipient_emails'], text)
server.quit()

logging.info(f"告警邮件已发送: {subject}")
except Exception as e:
logging.error(f"发送告警邮件时出错: {e}")

def check_all_servers(self) -> List[Dict]:
"""检查所有服务器状态"""
results = []

# 检查本地系统资源
cpu_alert, cpu_usage = self.check_cpu_usage()
memory_alert, memory_usage = self.check_memory_usage()
disk_alert, disk_usage = self.check_disk_usage()

local_result = {
'name': '本地服务器',
'type': 'system',
'status': not (cpu_alert or memory_alert or disk_alert),
'details': {
'cpu_usage': f"{cpu_usage:.1f}%",
'memory_usage': f"{memory_usage:.1f}%",
'disk_usage': f"{disk_usage:.1f}%"
},
'alerts': []
}

if cpu_alert:
local_result['alerts'].append(f"CPU使用率过高: {cpu_usage:.1f}%")
if memory_alert:
local_result['alerts'].append(f"内存使用率过高: {memory_usage:.1f}%")
if disk_alert:
local_result['alerts'].append(f"磁盘使用率过高: {disk_usage:.1f}%")

results.append(local_result)

# 检查远程服务器
for server in self.config['servers']:
if 'url' in server:
# HTTP服务检查
status = self.check_http_service(server['url'])
details = {'service': 'HTTP'}
else:
# 网络连通性检查
status = self.check_network_connectivity(server['host'], server['port'])
details = {'host': server['host'], 'port': server['port']}

server_result = {
'name': server['name'],
'type': 'network' if 'host' in server else 'http',
'status': status,
'details': details,
'alerts': [] if status else [f"服务不可达"]
}

results.append(server_result)

# 发送告警
if not status:
alert_key = f"{server['name']}_down"
if alert_key not in self.alert_history or \
time.time() - self.alert_history[alert_key] > 300: # 5分钟内不重复告警

self.alert_history[alert_key] = time.time()
subject = f"服务器告警: {server['name']} 服务不可达"
message = f"""
时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
服务器: {server['name']}
问题: 服务不可达
详情: {details}
"""
self.send_alert_email(subject, message)

return results

def print_status_report(self, results: List[Dict]):
"""打印状态报告"""
print("\n" + "="*60)
print(f"服务器状态检查报告 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)

for result in results:
status_icon = "✓" if result['status'] else "✗"
status_text = "正常" if result['status'] else "异常"
print(f"\n{status_icon} {result['name']} [{status_text}]")

# 显示详细信息
if isinstance(result['details'], dict):
for key, value in result['details'].items():
print(f" {key}: {value}")

# 显示告警信息
if result['alerts']:
for alert in result['alerts']:
print(f" ⚠️ {alert}")

print("\n" + "="*60)

def run_continuous_monitoring(self):
"""持续监控模式"""
interval = self.config['check_interval']
logging.info(f"开始持续监控,检查间隔: {interval} 秒")

try:
while True:
results = self.check_all_servers()
self.print_status_report(results)

# 记录到日志
healthy_count = sum(1 for r in results if r['status'])
total_count = len(results)
logging.info(f"健康服务器: {healthy_count}/{total_count}")

time.sleep(interval)
except KeyboardInterrupt:
logging.info("监控已停止")
except Exception as e:
logging.error(f"监控过程中出错: {e}")

def main():
parser = argparse.ArgumentParser(description='服务器状态检查工具')
parser.add_argument('--continuous', '-c', action='store_true',
help='持续监控模式')
parser.add_argument('--config', '-f', default='config.json',
help='配置文件路径')

args = parser.parse_args()

monitor = ServerMonitor(args.config)

if args.continuous:
monitor.run_continuous_monitoring()
else:
results = monitor.check_all_servers()
monitor.print_status_report(results)

if __name__ == "__main__":
main()

 

 

--------------------

 

PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/python_new/自动化/ServerMonitor.py
2026-03-25 17:52:02,637 - ERROR - 发送告警邮件时出错: Connection unexpectedly closed

============================================================
服务器状态检查报告 - 2026-03-25 17:52:02
============================================================

✓ 本地服务器 [正常]
cpu_usage: 8.5%
memory_usage: 83.7%
disk_usage: 17.7%

✗ 本地服务器 [异常]
host: 127.0.0.1
port: 80
⚠️ 服务不可达

✓ 百度 [正常]
host: www.baidu.com
port: 443

============================================================

 


import psutil
import smtplib
import schedule
import time
import logging
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
from datetime import datetime
import json
import os

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('server_monitor.log'),
        logging.StreamHandler()
    ]
)

class ServerMonitorMailer:
    def __init__(self, config_file='monitor_config.json'):
        """初始化监控邮件系统"""
        self.config = self.load_config(config_file)
        self.smtp_config = self.config['smtp_settings']
        self.recipients = self.config['recipients']
        self.monitor_settings = self.config['monitor_settings']
       
    def load_config(self, config_file):
        """加载配置文件"""
        try:
            with open(config_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        except FileNotFoundError:
            # 创建默认配置文件
            default_config = {
                "smtp_settings": {
                    "smtp_server": "smtp.gmail.com",
                    "smtp_port": 587,
                    "sender_email": "your_email@gmail.com",
                    "sender_password": "your_app_password"
                },
                "recipients": [
                    {
                        "name": "管理员",
                        "email": "admin@company.com",
                        "role": "admin"
                    }
                ],
                "monitor_settings": {
                    "cpu_threshold": 80,
                    "memory_threshold": 85,
                    "disk_threshold": 90,
                    "check_interval_minutes": 30
                },
                "report_schedule": {
                    "daily_report_time": "09:00",
                    "weekly_report_day": "monday",
                    "monthly_report_date": 1
                }
            }
            with open(config_file, 'w', encoding='utf-8') as f:
                json.dump(default_config, f, indent=4, ensure_ascii=False)
            logging.info(f"已创建默认配置文件: {config_file}")
            return default_config
   
    def get_system_info(self):
        """获取系统状态信息"""
        try:
            # CPU信息
            cpu_percent = psutil.cpu_percent(interval=1)
           
            # 内存信息
            memory = psutil.virtual_memory()
            memory_percent = memory.percent
           
            # 磁盘信息
            disk = psutil.disk_usage('/')
            disk_percent = (disk.used / disk.total) * 100
           
            # 网络信息
            net_io = psutil.net_io_counters()
           
            # 系统启动时间
            boot_time = datetime.fromtimestamp(psutil.boot_time())
           
            system_info = {
                'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                'cpu_percent': cpu_percent,
                'memory_percent': memory_percent,
                'disk_percent': disk_percent,
                'boot_time': boot_time.strftime('%Y-%m-%d %H:%M:%S'),
                'bytes_sent': net_io.bytes_sent,
                'bytes_recv': net_io.bytes_recv
            }
           
            return system_info
        except Exception as e:
            logging.error(f"获取系统信息时出错: {e}")
            return None
   
    def create_html_report(self, system_info, report_type="status"):
        """创建HTML格式的报告"""
        if not system_info:
            return ""
           
        css_style = """
        <style>
            body { font-family: Arial, sans-serif; margin: 20px; }
            .container { max-width: 800px; margin: 0 auto; background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
            .header { text-align: center; color: #333; border-bottom: 2px solid #007cba; padding-bottom: 15px; }
            .status-card { background: #f8f9fa; border-left: 4px solid #007cba; padding: 15px; margin: 15px 0; border-radius: 5px; }
            .warning { border-left-color: #ffc107; background: #fff3cd; }
            .danger { border-left-color: #dc3545; background: #f8d7da; }
            .metric { display: flex; justify-content: space-between; margin: 10px 0; }
            .metric-name { font-weight: bold; }
            .metric-value { color: #007cba; }
            .footer { text-align: center; margin-top: 20px; color: #666; font-size: 12px; }
        </style>
        """
       
        # 根据指标值确定状态级别
        cpu_level = "danger" if system_info['cpu_percent'] > self.monitor_settings['cpu_threshold'] else \
                   "warning" if system_info['cpu_percent'] > self.monitor_settings['cpu_threshold'] * 0.8 else ""
                   
        memory_level = "danger" if system_info['memory_percent'] > self.monitor_settings['memory_threshold'] else \
                      "warning" if system_info['memory_percent'] > self.monitor_settings['memory_threshold'] * 0.8 else ""
                     
        disk_level = "danger" if system_info['disk_percent'] > self.monitor_settings['disk_threshold'] else \
                    "warning" if system_info['disk_percent'] > self.monitor_settings['disk_threshold'] * 0.8 else ""
       
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <meta charset="UTF-8">
            <title>服务器状态报告</title>
            {css_style}
        </head>
        <body>
            <div class="container">
                <div class="header">
                    <h1>服务器状态监控报告</h1>
                    <p>{system_info['timestamp']}</p>
                </div>
               
                <div class="status-card">
                    <h2>CPU使用情况</h2>
                    <div class="metric">
                        <span class="metric-name">CPU使用率:</span>
                        <span class="metric-value">{system_info['cpu_percent']:.1f}%</span>
                    </div>
                    {f'<p style="color:red;">⚠️ CPU使用率过高,请关注!</p>' if cpu_level == 'danger' else ''}
                    {f'<p style="color:orange;">⚠️ CPU使用率偏高</p>' if cpu_level == 'warning' else ''}
                </div>
               
                <div class="status-card">
                    <h2>内存使用情况</h2>
                    <div class="metric">
                        <span class="metric-name">内存使用率:</span>
                        <span class="metric-value">{system_info['memory_percent']:.1f}%</span>
                    </div>
                    {f'<p style="color:red;">⚠️ 内存使用率过高,请关注!</p>' if memory_level == 'danger' else ''}
                    {f'<p style="color:orange;">⚠️ 内存使用率偏高</p>' if memory_level == 'warning' else ''}
                </div>
               
                <div class="status-card">
                    <h2>存储使用情况</h2>
                    <div class="metric">
                        <span class="metric-name">磁盘使用率:</span>
                        <span class="metric-value">{system_info['disk_percent']:.1f}%</span>
                    </div>
                    {f'<p style="color:red;">⚠️ 磁盘空间不足,请及时清理!</p>' if disk_level == 'danger' else ''}
                    {f'<p style="color:orange;">⚠️ 磁盘使用率偏高</p>' if disk_level == 'warning' else ''}
                </div>
               
                <div class="status-card">
                    <h2>网络流量统计</h2>
                    <div class="metric">
                        <span class="metric-name">发送字节数:</span>
                        <span class="metric-value">{system_info['bytes_sent']:,} bytes</span>
                    </div>
                    <div class="metric">
                        <span class="metric-name">接收字节数:</span>
                        <span class="metric-value">{system_info['bytes_recv']:,} bytes</span>
                    </div>
                </div>
               
                <div class="status-card">
                    <h2>系统信息</h2>
                    <div class="metric">
                        <span class="metric-name">系统启动时间:</span>
                        <span class="metric-value">{system_info['boot_time']}</span>
                    </div>
                </div>
               
                <div class="footer">
                    <p>此邮件由服务器自动发送,请勿回复</p>
                    <p>如有疑问请联系系统管理员</p>
                </div>
            </div>
        </body>
        </html>
        """
       
        return html_content
   
    def send_email(self, subject, content, recipients=None, is_html=False):
        """发送邮件"""
        try:
            if recipients is None:
                recipients = self.recipients
               
            # 创建邮件对象
            msg = MIMEMultipart()
            msg['From'] = self.smtp_config['sender_email']
            msg['To'] = ', '.join([r['email'] for r in recipients])
            msg['Subject'] = subject
           
            # 添加邮件正文
            if is_html:
                msg.attach(MIMEText(content, 'html', 'utf-8'))
            else:
                msg.attach(MIMEText(content, 'plain', 'utf-8'))
           
            # 连接SMTP服务器并发送邮件
            server = smtplib.SMTP(self.smtp_config['smtp_server'], self.smtp_config['smtp_port'])
            server.starttls()
            server.login(self.smtp_config['sender_email'], self.smtp_config['sender_password'])
           
            text = msg.as_string()
            server.sendmail(self.smtp_config['sender_email'],
                          [r['email'] for r in recipients], text)
            server.quit()
           
            logging.info(f"邮件发送成功: {subject}")
            return True
           
        except Exception as e:
            logging.error(f"发送邮件时出错: {e}")
            return False
   
    def send_status_alert(self, system_info):
        """发送状态告警邮件"""
        # 检查是否有超出阈值的情况
        alerts = []
        if system_info['cpu_percent'] > self.monitor_settings['cpu_threshold']:
            alerts.append(f"CPU使用率过高: {system_info['cpu_percent']:.1f}%")
        if system_info['memory_percent'] > self.monitor_settings['memory_threshold']:
            alerts.append(f"内存使用率过高: {system_info['memory_percent']:.1f}%")
        if system_info['disk_percent'] > self.monitor_settings['disk_threshold']:
            alerts.append(f"磁盘使用率过高: {system_info['disk_percent']:.1f}%")
       
        if alerts:
            subject = f"【警告】服务器状态异常 - {system_info['timestamp']}"
            html_content = self.create_html_report(system_info)
            self.send_email(subject, html_content, is_html=True)
            return True
        return False
   
    def send_daily_report(self):
        """发送每日汇总报告"""
        system_info = self.get_system_info()
        if system_info:
            subject = f"【日报】服务器状态汇总 - {system_info['timestamp'][:10]}"
            html_content = self.create_html_report(system_info, "daily")
            self.send_email(subject, html_content, is_html=True)
            logging.info("每日报告已发送")
   
    def send_weekly_report(self):
        """发送每周汇总报告"""
        system_info = self.get_system_info()
        if system_info:
            subject = f"【周报】服务器状态汇总 - {system_info['timestamp'][:10]}"
            html_content = self.create_html_report(system_info, "weekly")
            self.send_email(subject, html_content, is_html=True)
            logging.info("每周报告已发送")
   
    def start_monitoring(self):
        """开始监控"""
        logging.info("开始服务器状态监控...")
       
        # 设置定时任务
        schedule.every(self.monitor_settings['check_interval_minutes']).minutes.do(
            self.check_and_alert
        )
       
        # 设置日常报告
        daily_time = self.config['report_schedule']['daily_report_time']
        schedule.every().day.at(daily_time).do(self.send_daily_report)
       
        # 设置周报
        weekly_day = self.config['report_schedule']['weekly_report_day']
        getattr(schedule.every(), weekly_day).at("09:00").do(self.send_weekly_report)
       
        # 立即执行一次检查
        self.check_and_alert()
       
        # 持续运行
        while True:
            schedule.run_pending()
            time.sleep(60)
   
    def check_and_alert(self):
        """检查系统状态并发送告警"""
        system_info = self.get_system_info()
        if system_info:
            logging.info(f"系统检查完成 - CPU: {system_info['cpu_percent']:.1f}%, "
                        f"内存: {system_info['memory_percent']:.1f}%, "
                        f"磁盘: {system_info['disk_percent']:.1f}%")
           
            # 发送告警邮件(如果有异常)
            self.send_status_alert(system_info)

def main():
    """主函数"""
    try:
        monitor = ServerMonitorMailer()
       
        # 根据命令行参数决定运行模式
        import sys
        if len(sys.argv) > 1:
            if sys.argv[1] == '--once':
                # 单次检查模式
                system_info = monitor.get_system_info()
                if system_info:
                    html_content = monitor.create_html_report(system_info)
                    print("系统状态检查完成,报告已生成")
            elif sys.argv[1] == '--test-email':
                # 测试邮件发送
                system_info = monitor.get_system_info()
                if system_info:
                    html_content = monitor.create_html_report(system_info)
                    success = monitor.send_email(
                        "服务器监控系统测试邮件",
                        html_content,
                        is_html=True
                    )
                    if success:
                        print("测试邮件发送成功")
                    else:
                        print("测试邮件发送失败")
            else:
                print("未知参数。可用参数: --once (单次检查), --test-email (测试邮件)")
        else:
            # 持续监控模式
            monitor.start_monitoring()
           
    except KeyboardInterrupt:
        logging.info("监控程序已停止")
    except Exception as e:
        logging.error(f"程序运行出错: {e}")

if __name__ == "__main__":
    main()

博客园  ©  2004-2026
浙公网安备 33010602011771号 浙ICP备2021040463号-3