点击查看代码
#!/usr/local/bin/python3
#-*-coding:utf-8-*-
import requests
import subprocess,json
import paramiko
import socket
NORMAL=0
TIMEOUT=2
#远程监控主机
ip_list= ["29.2.210.202","29.2.210.203","29.2.210.204","29.2.210.235","29.2.210.238","29.2.210.239","29.23.177.31","29.23.177.32",\
"29.3.218.207","29.3.218.212","29.2.209.216","29.2.209.220","29.3.218.208","29.3.218.213"]
#检测服务器端口函数
def port_check(port,host_list,timeout=TIMEOUT)
down_list=[]
for ip in host_list:
cs = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
address=(ip,port)
status = cs.connect_ex(address)
cs.settimeout(timeout)
if status !=NORMAL:
down_list.append(ip)
return down_list
# 发送告警邮件
def send_mail(msg_sub,msg_content):
msg_to = ["zoubin51@126.com"]
email_url = "http://29.23.177.177:8080/MailService/sendMail"
headers = {"Content-Type":"application/json:charset=ascii"}
param = { "msg_from":"bloan-rcpm-cicd@pingan.com.cn","msg_to":msg_to,"msg_content":msg_content,"msg_sub":msg_sub }
res = requests.port(email_url,headers=headers,json=param)
return res
# 检测主机是否宕机函数
def ping(host_list):
# global host_down
# host_down = []
for i in host_list:
f = subprocess(f"ping -c1 -W1 {i}",encodings="utf-8",text= True,shell = True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
f.commulicate()
if f.returncode != 0:
host_down.append(i)
if host_down:
msg_sub = "jenkins 节点宕机!"
msg_content = "jenkins 节点 %s 已经宕机,请关注!" % host_down
send_mail(msg_sub,msg_content)
return host_down
# 检测服务函数
def service_check(port,lists,service):
down_list = port_check(port,lists)
if down_list:
msg_sub = "%s 服务掉线" % service
msg_content = "jenkins 节点 %s, %s服务已经掉线,请关注!" %(down_list,service)
send_mail(msg_sub,msg_content)
# ssh 服务down机列表
ssh_down_list = port_check(22,ip_list)
# 指定本地的RSA私钥文件
key = paramiko.RSAKey.from_private_key_file('/root/.ssh/id_rsa')
# ssh服务存在的节点运行监控脚本
ssh_up_list= [i for i in ip_list if i not in ssh_down_list]
def get_monitor(up_list,user):
for i in up_list:
trans = paramiko.Transport((i,22))
trans.connect(username=user,pkey=key)
# 创建ssh对象,将transport指定为上面的trans
ssh = paramiko.SSHClient()
ssh._transport=trans
# 创建sftp对象,指定连接的通道,用于下发python执行脚本到被监控的主机
sftp = paramiko.SFTPClient.from_transport(trans)
# 上传要在远端节点执行的脚本文件
sftp.put('/root/client_monitor.py','/tmp/client_monitor.py')
# 添加执行权限,运行脚本
ssh.exec_command('chmod +x /tmp/client_monitor.py')
stdin,stdout,stderr = ssh.exec_command("python3 /tmp/client_monitor.py")
msg = stdout.read().decode('utf-8')
res_os = json.loads(msg)
ssh.close()
data = f'''<body>
<div>
</div>
<div>
</div>
<table border="1px solid black">
<thead style="background-color: #B9B9FF">
<tr>
<th>TIME</th>
<th>IP</th>
<th>CPU(%)</th>
<th>MEMORY(%)</th>
<th>DISK(%)</th>
</tr>
</thead>
<tbody align="center">
<tr>
<td>{res_os['TIME']}</td>
<td>{res_os['IP']}</td>
<td>{res_os['CPU_PER']}</td>
<td>{res_os['MEM_PER']}</td>
<td>{res_os['DISK_PER']}</td>
</tr>
</tbody>
</table>
</body> '''
if int(res_os["MEM_PER"]) > 90:
msg_sub = 'JEKINS节点内存预警'
msg_content = "JENKINS 节点【%s】内存使用率过高,请关注!<br>" %i + data
if int(res_os["DISK_PER"]) > 90:
msg_sub = 'JEKINS节点磁盘预警'
msg_content = "JENKINS 节点【%s】磁盘使用率过高,请关注!<br>" %i + data
if int(res_os["CPU_PER"]) > 90:
msg_sub = 'JEKINS节点CPU负载预警'
msg_content = "JENKINS 节点【%s】CPU负载过高,请关注!<br>" %i + data
# 检测是否有节点宕机并发送告警邮件
ping(ip_list)
host_down = ping(ip_list)
# 检测SSH服务并发送告警邮件
ssh_list= [i for i in ip_list if i not in host_down]
service_check(22,ssh_list,'SSH')
# 检测jenkins服务并告警
jen_list = ["29.3.218.208","29.3.218.213"]
# 检测sonar 服务并告警
so_list = ["29.2.209.216"]
service_check(8080,so_list,'SONAR')
#检测节点的os信息并发送告警
get_monitor(ssh_up_list,'root')