import paramiko
import time
import sys
import threading
import subprocess
import re
import os
ssh_host_list = ["10.87.6.2","10.87.6.3","10.87.6.4","10.87.6.5",]
event = threading.Event()
event.clear()
def find_ok_host():
for i in ssh_host_list:
command = "ping {host} -n 1".format(host = i)
try:
a = subprocess.check_output(command)
except Exception as e:
continue
else:
return i
def create_paramiko_obj(host,command):
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname=host, port=22, username="root", password="admin123.")
try:
stdin, stdout, stderr = ssh.exec_command(command)
except Exception as e:
print("在主机{host}执行命令{command}失败,原因{error},脚本停止!".format(host=host,command=command,error=e))
return e
else:
res = stdout.read()
finally:
ssh.close()
return str(res,encoding="utf-8")
def find_master_host(host):
get_commond = """curl -XGET 'http://{host}:9200/_cat/nodes?pretty' |grep "*" |awk '{args}'""".format(host=host,args="{print $1}")
res = create_paramiko_obj(host,command=get_commond)
host_ip = "10.87.6." + re.split("[.]",res)[3]
host_ip = host_ip.rstrip("\n")
print("master的节点为{host},将要被重启!".format(host=host_ip))
reboot_commond = "reboot"
try:
create_paramiko_obj(host_ip, command=reboot_commond)
except Exception as e:
print("对节点{host}重启失败,错误信息为:{error}!".format(host=host_ip,error=e))
os._exit(1)
else:
return True
def master_status():
while True:
if not event.is_set():
for host in ssh_host_list:
for i in range(5):
try:
command = "systemctl status elasticsearch |grep running"
res = create_paramiko_obj(host, command)
if res:
command = "ss -ntl |grep 9200"
res = create_paramiko_obj(host, command)
if res:
print("{host}节点状态正常,检查下一个节点!".format(host=host))
break
else:
if i == 4:
print("5min后,{host}节点状态还不正常,测试脚本直接退出,es的服务可能已经出问题!".format(host=host))
os._exit(1)
print("{host}节点状态异常正常,1min后再次检查!".format(host=host))
time.sleep(60)
else:
if i == 4:
print("5min后,{host}节点状态还不正常,测试脚本直接退出,es的服务可能已经出问题!".format(host=host))
os._exit(1)
print("{host}节点状态异常正常,1min后再次检查!".format(host=host))
time.sleep(60)
except Exception as e:
print("{host}节点状态异常正常,1min后再次检查!".format(host=host))
time.sleep(60)
continue
for m in range(5):
command = """curl -XGET 'http://{host}:9200/_cluster/health?pretty' |grep status |awk -F ":" '{args}'""".format(
host=ssh_host_list[-1], args="{print $2}")
res = create_paramiko_obj(host=ssh_host_list[-1], command=command)
res = res.strip().strip("\n").strip('[,"]')
if "green" in res:
print("所有的节点状态检查正常,集群状态正常!")
break
else:
if m == 4:
os._exit(1)
else:
print("第{num}次检查,集群的状态为{status},1min后继续检查!".format(status=res,num=i))
m += 1
time.sleep(60)
event.set()
else:
continue
def operation():
while True:
if event.is_set():
ok_host = find_ok_host()
status = find_master_host(ok_host)
if status:
event.clear()
else:
print("操作失败")
continue
else:
print("等待中....")
event.wait()
if __name__ == '__main__':
t1 = threading.Thread(target=master_status)
t2 = threading.Thread(target=operation)
t1.start()
t2.start()