python代码
#!/usr/bin/env python # -*- coding: UTF-8 -*- import subprocess import sys import re def run_cmd(cmd, cwd=None, runas=None): if not sys.platform.startswith('win') and runas and runas != 'root': cmd = 'su - {} -c "{}"'.format(runas, cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=cwd) return proc def get_os_oracle_user(): cmd = ''' ps -ef | grep ora_ | grep -v grep | awk '{ print $1 }' | uniq ''' code, res = run_cmd(cmd) if code == 0: os_oracle_user = res return os_oracle_user else: print "获取运行database的操作系统用户失败...", res sys.exit(1) def get_grid_user(): cmd = '''ps -ef | grep asm_pmon | grep -v grep |uniq''' return_code, result = run_cmd(cmd) if return_code == 0 and result: os_oracle_user = result.split('\n')[0].split()[0].strip() return os_oracle_user else: print "获取运行database的操作系统用户失败...", result sys.exit(1) class Oracle_init(): """所有监控oracle的基类""" def __init__(self, user='monitoruser', password='123456'): try: if user == 'sysdba': self.user = '/ as sysdba' elif user and password: self.user = '{0}/{1}'.format(user, password) else: self.user = None self.os_oracle_user=get_os_oracle_user() except Exception as err: print err sys.exit(1) def run_cmd(self,sid,cmd): cmd = r'''su - {} -c "export ORACLE_SID={} sqlplus -S /nolog <<EOF conn /as sysdba; {} exit; EOF"'''.format(self.os_oracle_user, sid,cmd) code, res = run_cmd(cmd) error=self.check_error(res) if error: print error sys.exit(1) else: return res def check_error(self,res): if re.search(r'ORA-01017', res): error = '账号或密码错误' elif re.search(r'ORA-01034', res): error = '数据库不可用' elif re.search('ORA-\d+:[\s\S]*', res): error = re.search('ORA-\d+:[\s\S]*', res).group() else: error = None return error # 获取所有实例名 def get_sids(): sids = '' cmd = "ps -ef|grep -v grep |grep ora_pmon|awk '{print $NF}'|sed 's/ora\_pmon\_//'|uniq" code, res = run_cmd(cmd) if not code and res: sids = res.replace('\n', ' ') return sids ########判断ohasd状态############## def get_oracleOhasdStatus(): try: cmd = "ps -ef | grep ohasd.bin|grep -v grep" code,res=run_cmd(cmd) if code: return False else: return True except Exception as err: sys.stderr.write(err.__str__() + '\n') return False ################监听日志大小############## def get_oracle_listenerLog(sid): try: global is_cluster if is_cluster: user=get_grid_user() else: user= get_os_oracle_user() cmd = 'su - {} -c "export ORACLE_SID={};' \ 'du -sm $ORACLE_BASE/diag/tnslsnr/`hostname`/listener/trace/listener.log"'.format(user,sid) code,res=run_cmd(sid, cmd) if not res: print '未获得数据' sys.exit(1) else: return res.split()[0] except Exception as err: sys.stderr.write(err.__str__() + '\n') return -1 ################监听trace文件# ############## def get_oracleTrace(sid): try: global is_cluster if is_cluster: user = get_grid_user() cmd = 'su - {} -c "export ORACLE_SID={};cat $ORACLE_BASE/diag/asm/+asm/$ORACLE_SID/trace/alert_+ASM1.log |egrep \'ORA-|ERROR\'"'.format( user, sid) else: user = get_os_oracle_user() cmd='su - {} -c "export ORACLE_SID={};cat $ORACLE_BASE/diag/rdbms/$DB_UNIQUENAME/$ORACLE_SID/trace |egrep \'ORA-|ERROR\'"'.format( user, sid) code, res = run_cmd(sid, cmd) if not res: print '未获得数据' sys.exit(1) else: return res.split()[0] except Exception as err: sys.stderr.write(err.__str__() + '\n') return -1 if __name__=='__main__': output = "metric=%s|value=%d|type=%s|tags=%s" print output % ("system.file.number", get_oracleOhasdStatus(), 'gauge', '') obj = Oracle_init(user='monitoruser', password='123456') sql_cmd = "select value from v\\\$parameter where name='cluster_database';" sids=get_sids() res=obj.run_cmd(sids[0],sql_cmd) res=res.split('\n') length=len(res) is_cluster=None for i in range(-1,-1-length,-1): if not i : continue if i.strip()=='False': is_cluster=False break if i.strip()=='True': is_cluster=True break if is_cluster is None: print '无法判断是否是集群' sys.exit(1) for sid in sids: print output % ("system.file.number", get_oracle_listenerLog(sid), 'gauge', '') print output % ("system.file.number", get_oracleTrace(sid), 'gauge', '')