postgresql 监控日志的方法

postgresql数据库运行产生的日志都会输出到日志文件里,因此实时监控该文件就可以大致了解到数据库的运行状况。

public.sp_gather_pgsql_log_part(pi_time_last integer)

这个函数的作用是创建目标机的日志外部表

CREATE OR REPLACE FUNCTION public.sp_gather_pgsql_log_part(pi_time_last integer)
 RETURNS void
 LANGUAGE plpgsql
AS $function$
declare
    v_sql        text;
    v_sp_name    varchar(100);
    v_pglogfile  varchar(100);
    v_pglogfile2 varchar(100);
    v_time_last  int4;
    v_time       timestamp without time zone;

    c_db RECORD;
    d_db RECORD;

begin 

    v_sql := '';
    v_sp_name :='public.sp_gather_pgsql_log_part()';
    v_pglogfile :='';


    for c_db in (
        select distinct 
               td.db_num,
               td.db_version, 
               td.db_master_role_phyip      as db_mon_ip,
               td.db_master_role_phyip_port as db_mon_ip_port,

               tdi.db_user,
               tdi.db_pwd,
               tdi.default_dbname
          from t_task_database td,
               t_task_database_ident tdi
         where 1=1
           and td.db_num= tdi.db_num
           and td.db_ismonitor = true
           and td.db_master_role_phyip is not null 
           and td.db_master_role_phyip <>  ''
           and td.db_type = 'pgsql'
           --and td.db_num = 'pg002'
    )
    loop
      v_pglogfile  :='';
      v_pglogfile2 :='';
      BEGIN
           v_sql:='
           with tmp_pglogfile as (
              select * 
                from dblink(''dbname='||c_db.default_dbname||' host='||c_db.db_mon_ip||' port='||c_db.db_mon_ip_port||' user='||c_db.db_user||' password='||c_db.db_pwd||''',
                            '' select t00.file,t00.file_ls
                                from (
                                    select t1.file,
                                           t1.file_ls,
                                           (pg_stat_file(t1.file)).modification as last_update_time,
                                           round( ((pg_stat_file(t1.file)).size)/1024/1024*1.0,1) as log_size_mb
                                      from (select dir||''''/''''||pg_ls_dir(t0.dir) as file,
                                                   pg_ls_dir(t0.dir) as file_ls
                                              from ( select setting as dir from pg_settings where name=''''log_directory''''
                                                    ) t0
                                            ) t1 
                                     where 1=1
                                     order by (pg_stat_file(file)).modification desc
                                    ) t00
                                where 1=1
                                  and t00.file_ls like ''''%.csv''''
                                limit 1  '') as  t1 (pglogfile varchar,pglogfilels varchar)
           )
           select pglogfile
             from tmp_pglogfile     
          ';

          --获取当前日志文件
          execute v_sql into v_pglogfile;

          v_sql:='
           with tmp_pglogfile2 as (
              select * 
                from dblink(''dbname='||c_db.default_dbname||' host='||c_db.db_mon_ip||' port='||c_db.db_mon_ip_port||' user='||c_db.db_user||' password='||c_db.db_pwd||''',
                            '' with tmp_t0 as (
                                    select pc.relname,
                                           unnest(pft.ftoptions) as relinfo
                                      from pg_class pc,
                                           pg_foreign_table pft
                                     where 1=1
                                       and pc.oid = pft.ftrelid
                                       and pc.relname like ''''pglog_current''''
                               )
                               select replace(relinfo,''''filename='''','''''''') as rel
                                 from tmp_t0
                                where 1=1
                                  and relinfo like ''''%filename=%''''
                            '') as  t1 (pglogfile2 varchar)
           )
           select pglogfile2
             from tmp_pglogfile2    
          ';

          raise notice 'v_sql= %',v_sql;


          --获取当前日志文件外部表指向的日志文件
          execute v_sql into v_pglogfile2;


          raise notice 'v_pglogfile= %,v_pglogfile2= %',v_pglogfile,v_pglogfile2;


          if v_pglogfile = v_pglogfile2 then
             continue;
          else 
             v_sql:='
              select dblink(''dbname='||c_db.default_dbname||' host='||c_db.db_mon_ip||' port='||c_db.db_mon_ip_port||' user='||c_db.db_user||' password='||c_db.db_pwd||''',
                            '' drop foreign table if exists pglog_current;  '')     
             ';

             --raise notice 'v_sql= %',v_sql;

             --删除远程机器当前日志文件的外部表
             execute v_sql;

             v_sql:='
              select dblink(''dbname='||c_db.default_dbname||' host='||c_db.db_mon_ip||' port='||c_db.db_mon_ip_port||' user='||c_db.db_user||' password='||c_db.db_pwd||''',
                            '' 
                                CREATE FOREIGN TABLE pglog_current (  
                                  log_time timestamp(3) without time zone,  
                                  user_name text,  
                                  database_name text,  
                                  process_id integer,  
                                  connection_from text,  
                                  session_id text,  
                                  session_line_num bigint, 
                                  command_tag text, 

                                  session_start_time timestamp with time zone,  
                                  virtual_transaction_id text,  
                                  transaction_id bigint,  
                                  error_severity text,  
                                  sql_state_code text,  
                                  message text,  
                                  detail text,  
                                  hint text,  
                                  internal_query text,  
                                  internal_query_pos integer,  
                                  context text,  
                                  query text,  
                                  query_pos integer,  
                                  location text,  
                                  application_name text
                                 ) 
                                server file_fdw_server 
                                options(filename '''''||v_pglogfile||''''',format ''''csv'''')
                                ;  '')  
             ';

             --创建远程机器当前日志文件的外部表
             execute v_sql;

          end if;


      EXCEPTION 
          WHEN others then

          --raise notice 'v_sql= %',v_sql;
          --记录异常日志          
          perform sp_util_exception_writelog(
             c_db.db_num,
             c_db.db_mon_ip,
             now(),
             'log',

             v_sp_name,
             sqlstate::varchar,
             sqlerrm::text,
             'n20'::varchar,
             'dba'::varchar
           );

      END;
    end loop;

    --减少stream 异步复制的报错
    execute 'select pg_sleep(3)';

    --插入最近pglog数据,主库和从库的日志都要插入
    delete from t_gather_pgsql_log_part;

    select pi_time_last
      into v_time_last
      ;

    select LOCALTIMESTAMP(0)
      into v_time
      ;

    for d_db in (
       select distinct 
              t0.db_num,
              t0.db_version, 
              t0.db_mon_ip,
              t0.db_mon_ip_port,

              t0.db_user,
              t0.db_pwd,
              t0.default_dbname
       from (
            select distinct 
                   td.db_num,
                   td.db_version, 
                   td.db_master_role_phyip      as db_mon_ip,
                   td.db_master_role_phyip_port as db_mon_ip_port,

                   tdi.db_user,
                   tdi.db_pwd,
                   tdi.default_dbname
              from t_task_database td,
                   t_task_database_ident tdi
             where 1=1
               and td.db_num= tdi.db_num
               and td.db_ismonitor = true
               and td.db_master_role_phyip is not null 
               and td.db_master_role_phyip <>  '' 
               and td.db_type = 'pgsql'

            union all

            select distinct 
                   td.db_num,
                   td.db_version, 
                   td.db_slave_role_phyip       as db_mon_ip,
                   td.db_slave_role_phyip_port  as db_mon_ip_port,

                   tdi.db_user,
                   tdi.db_pwd,
                   tdi.default_dbname
              from t_task_database td,
                   t_task_database_ident tdi
             where 1=1
               and td.db_num= tdi.db_num
               and td.db_ismonitor = true
               and td.db_master_role_phyip is not null 
               and td.db_master_role_phyip <>  '' 
               and td.db_type = 'pgsql'
             ) t0
        where 1=1
          --and t0.db_num='pg002'

    )
    loop

      BEGIN
          v_sql:='
           insert into t_gather_pgsql_log_part(
            db_num,
            db_ip,
            log_time,
            user_name,
            database_name,
            process_id,
            connection_from,
            session_id,
            session_line_num,
            command_tag,
            session_start_time,
            virtual_transaction_id,
            transaction_id,
            error_severity,
            sql_state_code,
            message,
            detail,
            hint,
            internal_query,
            internal_query_pos,
            context,
            query,
            query_pos,
            location,
            application_name
           )
           with tmp_pglogdata as (
              select * 
                from dblink(''dbname='||d_db.default_dbname||' host='||d_db.db_mon_ip||' port='||d_db.db_mon_ip_port||' user='||d_db.db_user||' password='||d_db.db_pwd||''',
                            '' select 
                                    log_time,
                                    user_name,
                                    database_name,
                                    process_id,
                                    connection_from,
                                    session_id,
                                    session_line_num,
                                    command_tag,
                                    session_start_time,
                                    virtual_transaction_id,
                                    transaction_id,
                                    error_severity,
                                    sql_state_code,
                                    message,
                                    detail,
                                    hint,
                                    internal_query,
                                    internal_query_pos,
                                    context,
                                    query,
                                    query_pos,
                                    location,
                                    application_name
                               from pglog_current 
                               where 1=1
                                 and log_time between '''''||to_char(v_time - (v_time_last::varchar)::interval DAY TO second,'yyyy-mm-dd hh24:mi:ss')||'''''::timestamp without time zone  
                                                  and '''''||to_char(v_time ,'yyyy-mm-dd hh24:mi:ss')||'''''::timestamp without time zone
                           '') as  t1 (   log_time timestamp(3) without time zone,  
                                          user_name text,  
                                          database_name text,  
                                          process_id integer,  
                                          connection_from text,  
                                          session_id text,  
                                          session_line_num bigint, 
                                          command_tag text, 

                                          session_start_time timestamp with time zone,  
                                          virtual_transaction_id text,  
                                          transaction_id bigint,  
                                          error_severity text,  
                                          sql_state_code text,  
                                          message text,  
                                          detail text,  
                                          hint text,  
                                          internal_query text,  
                                          internal_query_pos integer,  
                                          context text,  
                                          query text,  
                                          query_pos integer,  
                                          location text,  
                                          application_name text)
           )
           select   '''||d_db.db_num||''' as db_num,
                    '''||d_db.db_mon_ip||''' as db_ip,
                    log_time,
                    user_name,
                    database_name,
                    process_id,
                    connection_from,
                    session_id,
                    session_line_num,
                    command_tag,
                    session_start_time,
                    virtual_transaction_id,
                    transaction_id,
                    error_severity,
                    sql_state_code,
                    message,
                    detail,
                    hint,
                    internal_query,
                    internal_query_pos,
                    context,
                    query,
                    query_pos,
                    location,
                    application_name
             from tmp_pglogdata     
          ';

          --raise notice 'v_sql= %',v_sql;

          --获取当前日志文件数据
          execute v_sql ;

          --raise notice 'v_pglogfile= %',v_pglogfile;

      EXCEPTION 
          WHEN others then

          --raise notice 'v_sql= %',v_sql;
          --记录异常日志
          perform sp_util_exception_writelog(
             d_db.db_num,
             d_db.db_mon_ip,
             now(),
             'log',

             v_sp_name,
             sqlstate::varchar,
             sqlerrm::text,
             'n20'::varchar,
             'dba'::varchar
           );

      END;
    end loop;


    RETURN;
end;
$function$

public.sp_gather_pgsql_log_done()

这个函数的作用是根据一些预先定义的关键字从临时日志表里获取到关联的信息。

CREATE OR REPLACE FUNCTION public.sp_gather_pgsql_log_done()
 RETURNS void
 LANGUAGE plpgsql
AS $function$
declare
    v_sql  text;
    v_sp_name   varchar(100);
    v_time      timestamp without time zone;
    v_filename  varchar(100);

    v_error_severity_list text;
    v_sql_state_code text;

    c_db RECORD;
    d_db RECORD;

begin 

    v_sql := '';
    v_sp_name :='public.sp_gather_pgsql_log_done()';


    for c_db in (

         select tp.db_num,
                tp.db_ip as db_mon_ip,
                to_char(min(tp.log_time),'yyyy-mm-dd hh24:mi:ss') as min_log_time,
                to_char(max(tp.log_time),'yyyy-mm-dd hh24:mi:ss') as max_log_time,
                to_char(min(tp.log_time),'yyyymmddhh24miss')      as min_log_time_file,
                to_char(max(tp.log_time),'yyyymmddhh24miss')      as max_log_time_file
           from public.t_gather_pgsql_log_part tp
          where 1=1
          group by tp.db_num,
                   tp.db_ip
          order by tp.db_num,
                   tp.db_ip         

    )
    loop
      v_sql :='';
      v_error_severity_list :='';
      v_sql_state_code :='';
      v_filename :='';

      BEGIN

          for d_db in (
             select tp.error_severity,
                    tp.sql_state_code,
                    tp.message,
                    count(1) as cnt,
                    sp_get_ip_to_applist(string_agg( distinct substring( tp.connection_from,1,position(':' in tp.connection_from)-1 ),',') ) as conn_ip
             from public.t_gather_pgsql_log_part tp
             where 1=1
               and tp.db_num = c_db.db_num
               and tp.db_ip = c_db.db_mon_ip
               and lower(tp.error_severity) in (
                    'warning',
                    'error',
                    'fatal',
                    'panic',
                    'deadlock'
                   )
               and tp.message not like '[____]%'
               --and tp.message not like '%violates unique constraint%'
             group by tp.error_severity,
                      tp.sql_state_code,
                      tp.message
             order by case when lower(tp.error_severity) = 'panic'    then 1 
                           when lower(tp.error_severity) = 'fatal'    then 2
                           when lower(tp.error_severity) = 'error'    then 3
                           when lower(tp.error_severity) = 'warning'  then 4
                           when lower(tp.error_severity) = 'deadlock' then 5
                           else 99
                       end,
                      case when tp.message  like '[____]%' then 99
                           else 1
                       end,    
                      cnt desc 
          )
          loop
            begin
              v_sql := v_sql||'
'||d_db.cnt||'    '||d_db.error_severity||'    '||d_db.sql_state_code||'    '||d_db.message||'    '||d_db.conn_ip;

              --raise notice 'v_sql= %',v_sql;
              v_error_severity_list := v_error_severity_list||d_db.error_severity||'##';
              v_sql_state_code      := v_sql_state_code||d_db.sql_state_code||'##';
              --execute v_sql;
            end; 

          end loop; 

          if ( length(v_sql) > 0 ) then

              v_filename:='/tmp/pgsql_pglog_current_done/pglog_'||trim(replace(c_db.db_mon_ip,'.','_'))||'_'||trim(c_db.min_log_time_file)||'_'||trim(c_db.max_log_time_file)||'.csv';


              execute ' 
              copy (
                     select *
                       from public.t_gather_pgsql_log_part tp
                      where 1=1
                        and tp.db_num = '''||c_db.db_num||'''    
                        and tp.db_ip = '''||c_db.db_mon_ip||'''
                        and tp.error_severity in ( select distinct str_tab from (select regexp_split_to_table('''||v_error_severity_list||''',''##'') as str_tab ) t0 where str_tab is not null and str_tab <> '''' )
                        and tp.sql_state_code in ( select distinct str_tab from (select regexp_split_to_table('''||v_sql_state_code||''',''##'') as str_tab ) t0 where str_tab is not null and str_tab <> '''' )
                        and tp.message not like ''[____]%''
                      order by tp.log_time 
                   )
              to '''||v_filename||''' 
              CSV HEADER
              ';


              perform sp_util_exception_writelog(
                 c_db.db_num,
                 c_db.db_mon_ip,
                 now(),
                 'log',

                 v_sp_name,
                 (c_db.min_log_time||' ~ '||c_db.max_log_time)::varchar,
                 v_sql::text,
                 'n20'::varchar,
                 'dba,sa,kfa,ita,csa'::varchar,
                 --'dba'::varchar,
                 v_filename
               );
          end if;

      EXCEPTION 
          WHEN others then

          --raise notice 'v_sql= %',v_sql;

          perform sp_util_exception_writelog(
             c_db.db_num,
             c_db.db_mon_ip,
             now(),
             'log',

             v_sp_name,
             --(c_db.min_log_time||' ~ '||c_db.max_log_time)::varchar,
             --v_sql::text,
             sqlstate::varchar,
             sqlerrm::text,
             'n20'::varchar,
             'dba'::varchar
           );

      END;
    end loop;


    RETURN;
end;
$function$
posted @ 2018-06-22 14:58  peiybpeiyb  阅读(487)  评论(0编辑  收藏  举报