open-falcon 监控rabbitmq队列删除后,报警自动恢复脚本
脚本通过rabbitmq的15672端口的api获取的监控信息上报至open-falcon,某些队列触发了报警信息,然后直接被管理员在rabbitmq删除了队列,导致open-falcon的报警无法自动恢复,写个脚本可以自动判断下没有上报数据的队列,然后补一个值让告警自动恢复
# -*- coding: UTF-8 -*- #!/usr/bin/python3.6 import json import time import requests #Get Counter of Endpoint ip='xxx.xxx.xx.xxx' #falcon的ip port='9966' user='root' password='xxxxxx' login_uri='/api/v1/user/login' login_url = 'http://'+ip+':'+port+login_uri #session s = requests.Session() #通过登录api登录 login_res=s.post(url=login_url,data={'name': user,'password': password}) #返回name sig admin 一个字典 login_data=json.loads(login_res.text) sig=login_data['sig'] #透过Session检查去判定使用者可否存取资源,获取资源需要透过RequestHeader 的 Apitoken做验证 api_token = '{"name":"' + user + '", "sig":"' + sig + '"}' def get_endpoint_id(endpoint): uri='/api/v1/graph/endpoint?q={0}'.format(endpoint) url="http://"+ip+":"+port+uri get_result=s.get(url=url).text return json.loads(get_result) def get_counters(eid,metricQuery): list1=[] uri='/api/v1/graph/endpoint_counter?eid={0}&metricQuery={1}'.format(eid,metricQuery) url="http://"+ip+":"+port+uri get_result=s.get(url=url).text for counter in json.loads(get_result): list1.append(counter['counter']) #print(list1) return list1 def add_no_consumer_nodata(graph_data): list1=[] for counter in graph_data: dict1={} if counter['Values']: none_num=0 for value in counter['Values']: if value['value'] is None: none_num= none_num+1 if none_num >= 29: push_value=2 else: continue dict1['endpoint']=counter['endpoint'] dict1['counter']=counter['counter'] dict1['value']=push_value list1.append(dict1) #print(list1) return list1 def get_graph_history(endpoint,counters): uri='/api/v1/graph/history' user=json.loads(api_token)['name'] sig=json.loads(api_token)['sig'] #通过api来获取告警信息 falcon_header = { "Apitoken": api_token, "X-Forwarded-For": "127.0.0.1", "Content-Type": "application/json", "name": user, "sig": sig } url='http://'+ip+':'+port+uri #post方法的参数 params = { 'url': url, 'headers': falcon_header, 'timeout': 30 } #查询告警信息api的Request payload = { "step": 60, "start_time": int(time.time())-1800, "end_time": int(time.time()), "hostnames": endpoint, "counters": counters, "consol_fun": "AVERAGE" } #把payload打包添加到post参数data里 params['data']=json.dumps(payload) #通过post获取告警信息 graph_res = s.post(**params) graph_data = json.loads(graph_res.text) return graph_data def push_data(value_list): payload = [] for endpoint in value_list: metric=endpoint['counter'].split('/')[0] try: tags=endpoint['counter'].split('/')[1] except: tags='' push_dict={ "endpoint": endpoint['endpoint'], "metric": metric, "timestamp": int(time.time()), "step": 60, "value": endpoint['value'], "counterType": "GAUGE", "tags": tags, } payload.append(push_dict) print(payload) r = requests.post("http://127.0.0.1:1988/v1/push", data=json.dumps(payload)) return r.text if __name__ == '__main__': endpoints=['rabbitmq01.produce.zs'] search_counters=['rabbitmq.messages_no_consumers','rabbitmq.messages_total'] for endpoint in endpoints: endpoint_info=get_endpoint_id(endpoint) endpoint_id=int(endpoint_info[0]['id']) for counter in search_counters: counter_list=get_counters(endpoint_id,counter) graph_result=get_graph_history([endpoint],counter_list) value_list=add_no_consumer_nodata(graph_result) result=push_data(value_list) print(result)
浙公网安备 33010602011771号