import paramiko
import time
import sys
import threading
import subprocess
import re
import os

ssh_host_list = ["10.87.6.2","10.87.6.3","10.87.6.4","10.87.6.5",]
event = threading.Event()
event.clear()

def find_ok_host():

    for i in ssh_host_list:
        command = "ping {host} -n 1".format(host = i)
        try:
            a = subprocess.check_output(command)
        except Exception as e:
            continue
        else:
            return i


def create_paramiko_obj(host,command):

    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(hostname=host, port=22, username="root", password="admin123.")
    try:
        stdin, stdout, stderr = ssh.exec_command(command)
    except Exception as e:
        print("在主机{host}执行命令{command}失败,原因{error},脚本停止!".format(host=host,command=command,error=e))
        return e
    else:
        res = stdout.read()
    finally:
        ssh.close()
    return str(res,encoding="utf-8")


def find_master_host(host):
    get_commond = """curl -XGET 'http://{host}:9200/_cat/nodes?pretty' |grep "*" |awk '{args}'""".format(host=host,args="{print $1}")
    res = create_paramiko_obj(host,command=get_commond)

    host_ip = "10.87.6." + re.split("[.]",res)[3]
    host_ip = host_ip.rstrip("\n")
    print("master的节点为{host},将要被重启!".format(host=host_ip))
    reboot_commond = "reboot"
    try:
        create_paramiko_obj(host_ip, command=reboot_commond)
    except Exception as e:
        print("对节点{host}重启失败,错误信息为:{error}!".format(host=host_ip,error=e))
        os._exit(1)
    else:
        return True


def master_status():
    while True:
        if not event.is_set():
            for host in ssh_host_list:
                for i in range(5):
                    try:
                        command = "systemctl status elasticsearch |grep running"
                        res = create_paramiko_obj(host, command)
                        if res:
                            command = "ss -ntl |grep 9200"
                            res = create_paramiko_obj(host, command)
                            if res:
                                print("{host}节点状态正常,检查下一个节点!".format(host=host))
                                break
                            else:
                                if i == 4:
                                    print("5min后,{host}节点状态还不正常,测试脚本直接退出,es的服务可能已经出问题!".format(host=host))
                                    os._exit(1)
                                print("{host}节点状态异常正常,1min后再次检查!".format(host=host))
                                time.sleep(60)
                        else:
                            if i == 4:
                                print("5min后,{host}节点状态还不正常,测试脚本直接退出,es的服务可能已经出问题!".format(host=host))
                                os._exit(1)
                            print("{host}节点状态异常正常,1min后再次检查!".format(host=host))
                            time.sleep(60)
                    except Exception as e:
                        print("{host}节点状态异常正常,1min后再次检查!".format(host=host))
                        time.sleep(60)
                        continue

            for m in range(5):
                command = """curl -XGET 'http://{host}:9200/_cluster/health?pretty' |grep status |awk -F ":" '{args}'""".format(
                    host=ssh_host_list[-1], args="{print $2}")
                res = create_paramiko_obj(host=ssh_host_list[-1], command=command)
                res = res.strip().strip("\n").strip('[,"]')
                if "green" in res:
                    print("所有的节点状态检查正常,集群状态正常!")
                    break
                else:
                    if m == 4:
                        os._exit(1)
                    else:
                        print("第{num}次检查,集群的状态为{status},1min后继续检查!".format(status=res,num=i))
                        m += 1
                        time.sleep(60)

            event.set()

        else:
            continue

def operation():
    while True:
        if event.is_set():
            ok_host = find_ok_host()
            status = find_master_host(ok_host)
            if status:
                event.clear()

            else:
                print("操作失败")
            continue
        else:
            print("等待中....")
            event.wait()


if __name__ == '__main__':

    t1 = threading.Thread(target=master_status)
    t2 = threading.Thread(target=operation)

    t1.start()
    t2.start()
   

 

posted on 2019-03-13 09:58  bainianminguo  阅读(344)  评论(0)    收藏  举报