[zabbix] 服务和配置 - 迁

 配置

 

1 服务器端配置

1 mysql
mysqladmin -uroot password 'mysql'

2 php

./configure --prefix=/usr/local/php-5.6.15 --with-config-file-path=/usr/local/php-5.6.15/etc --with-bz2 --with-curl --enable-ftp --enable-sockets --disable-ipv6 --with-gd --with-jpeg-dir=/usr/local --with-png-dir=/usr/local --with-freetype-dir=/usr/local --enable-gd-native-ttf --with-iconv-dir=/usr/local --enable-mbstring --enable-calendar --with-gettext --with-libxml-dir=/usr/local --with-zlib --with-pdo-mysql=mysqlnd --with-mysqli=mysqlnd --with-mysql=mysqlnd --enable-dom --enable-xml --enable-fpm --with-libdir=lib64
make && make install

缺少php的gettext 模块

http://blog.csdn.net/u010098331/article/details/50750771

php-fpm的配置

[global]
pid = run/php-fpm.pid
error_log = log/php-fpm.log


[www]
user=www
group=www
listen = 127.0.0.1:9000
pm=dynamic 
pm.max_children=200 # 配置的太少会把子进程消耗完
pm.start_servers=30 
pm.min_spare_servers=30 
pm.max_spare_servers=60

pm.status_path = /php_fpm-status  # 采集php-fpm的状态

slowlog = log/$pool.log.slow
php_admin_flag[log_errors] = on
php_admin_value[memory_limit] = 512M  # 默认值小,如果zabbix admin页面内容多的话会无响应

  

3 安装 bcmath

cd ext/bcmath/
/usr/local/php-5.6.15/bin/phpize
./configure --with-php-config=/usr/local/php-5.6.15/bin/php-config 
make && make install

  

在php.ini中配置extension_dir ,并把bcmath.so 拷贝过去
extension=/usr/local/php-5.6.15/ext/bcmath.so
cp modules/bcmath.so /usr/local/php-5.6.15/ext/

4 nginx

server {
        listen 80;
        server_name  love.zabbix;
        index index.html  index.php;
        root /www/zabbix;
        location ~ .*\.(php|php5)?$
        {
                fastcgi_pass  127.0.0.1:9000;
                fastcgi_index index.php;
                include fastcgi.conf;
        }
        location ~ .*\.(gif|jpg|jpeg|png|bmp|swf)$
        {
                expires 30d;
        }
        location ~ .*\.(js|css)?$
        {
                expires 1h;
        }
  #      access_log off;
}

 

5 zabbix server  

配置页

http://localhost/setup.php

LoggFile=/tmp/zabbix_server.log
DebugLevel=3
DBHost=localhost
DBName=zabbix
DBUser=xxxx
DBPassword=xxxx
DBSocket=/tmp/mysql.sock
DBPort=3306

JavaGateway=127.0.0.1
JavaGatewayPort=10052
StartJavaPollers=5
StartDiscoverers=10

AlertScriptsPath=/etc/zabbix/alertscripts
ExternalScripts=/etc/zabbix/externalscripts
#外部脚本目录
#FpingLocation=/usr/sbin/fping
StartPollers=500
StartPingers=150
StartTrappers=100
TrapperTimeout=100
StartDiscoverers=100
StartPollersUnreachable=100
#告警线程总数 3.4 之后有效
StartAlerters=50

Timeout=30

UnreachablePeriod=50
UnavailableDelay=60
UnreachableDelay=20
LogSlowQueries=0
TmpDir=/tmp

StartProxyPollers=10
#在zabbix proxy被动模式下用此参数
#ProxyConfigFrequency=1000
ProxyDataFrequency=1

CacheSize=1024M
StartDBSyncers=20
TrendCacheSize=1024M
ValueCacheSize=1024M
TrendCacheSize=256M
ValueCacheSize=1024M
HistoryCacheSize=1024M
HistoryIndexCacheSize=512M

VMwareCacheSize=200M
StartVMwareCollectors=100
VMwareFrequency=60

HousekeepingFrequency=3
#zabbix执行Housekeeping的频率,单位为hours
MaxHousekeeperDelete=50000
#每次最多删除历史数据的行

 

zabbix很容易出现历史表过大的问题.zabbix官方也是建议使用表分区

每日执行脚本

/usr/local/mysql/bin/mysql -uzabbix -pXXXXXX zabbix -e "CALL partition_maintenance_all('zabbix');" 

  

执行日志

Tue Nov  6 17:30:01 CST 2018
call partition_maintenance_all function:
========================================
msg
partition_create(zabbix,history,p201812050000,1544025600)
table   partitions_deleted
zabbix.history  p201810060000
msg
partition_create(zabbix,history_log,p201812050000,1544025600)
table   partitions_deleted
zabbix.history_log      p201810060000
msg
partition_create(zabbix,history_str,p201812050000,1544025600)
table   partitions_deleted
zabbix.history_str      p201810060000
msg
partition_create(zabbix,history_text,p201812050000,1544025600)
table   partitions_deleted
zabbix.history_text     p201810060000
msg
partition_create(zabbix,history_uint,p201812050000,1544025600)
table   partitions_deleted
zabbix.history_uint     p201810060000
msg
partition_create(zabbix,trends,p201812050000,1544025600)
table   partitions_deleted
zabbix.trends   p201805090000
msg
partition_create(zabbix,trends_uint,p201812050000,1544025600)
table   partitions_deleted
zabbix.trends_uint      p201805090000

  

 

调用存储过程

DELIMITER $$
CREATE PROCEDURE `partition_maintenance_all`(SCHEMA_NAME VARCHAR(32))
BEGIN
                CALL partition_maintenance(SCHEMA_NAME, 'history', 30, 24, 30);
                CALL partition_maintenance(SCHEMA_NAME, 'history_log', 30, 24, 30);
                CALL partition_maintenance(SCHEMA_NAME, 'history_str', 30, 24, 30);
                CALL partition_maintenance(SCHEMA_NAME, 'history_text', 30, 24, 30);
                CALL partition_maintenance(SCHEMA_NAME, 'history_uint', 30, 24, 30);
                CALL partition_maintenance(SCHEMA_NAME, 'trends', 180, 24, 30);
                CALL partition_maintenance(SCHEMA_NAME, 'trends_uint', 180, 24, 30);
END$$
DELIMITER ;

  

 

分区脚本

DELIMITER $$
CREATE PROCEDURE `partition_create`(SCHEMANAME varchar(64), TABLENAME varchar(64), PARTITIONNAME varchar(64), CLOCK int)
BEGIN
        /*
           SCHEMANAME = The DB schema in which to make changes
           TABLENAME = The table with partitions to potentially delete
           PARTITIONNAME = The name of the partition to create
        */
        /*
           Verify that the partition does not already exist
        */
 
        DECLARE RETROWS INT;
        SELECT COUNT(1) INTO RETROWS
        FROM information_schema.partitions
        WHERE table_schema = SCHEMANAME AND table_name = TABLENAME AND partition_description >= CLOCK;
 
        IF RETROWS = 0 THEN
                /*
                   1. Print a message indicating that a partition was created.
                   2. Create the SQL to create the partition.
                   3. Execute the SQL from #2.
                */
                SELECT CONCAT( "partition_create(", SCHEMANAME, ",", TABLENAME, ",", PARTITIONNAME, ",", CLOCK, ")" ) AS msg;
                SET @sql = CONCAT( 'ALTER TABLE ', SCHEMANAME, '.', TABLENAME, ' ADD PARTITION (PARTITION ', PARTITIONNAME, ' VALUES LESS THAN (', CLOCK, '));' );
                PREPARE STMT FROM @sql;
                EXECUTE STMT;
                DEALLOCATE PREPARE STMT;
        END IF;
END$$
DELIMITER ;
 
DELIMITER $$
CREATE PROCEDURE `partition_drop`(SCHEMANAME VARCHAR(64), TABLENAME VARCHAR(64), DELETE_BELOW_PARTITION_DATE BIGINT)
BEGIN
        /*
           SCHEMANAME = The DB schema in which to make changes
           TABLENAME = The table with partitions to potentially delete
           DELETE_BELOW_PARTITION_DATE = Delete any partitions with names that are dates older than this one (yyyy-mm-dd)
        */
        DECLARE done INT DEFAULT FALSE;
        DECLARE drop_part_name VARCHAR(16);
 
        /*
           Get a list of all the partitions that are older than the date
           in DELETE_BELOW_PARTITION_DATE.  All partitions are prefixed with
           a "p", so use SUBSTRING TO get rid of that character.
        */
        DECLARE myCursor CURSOR FOR
                SELECT partition_name
                FROM information_schema.partitions
                WHERE table_schema = SCHEMANAME AND table_name = TABLENAME AND CAST(SUBSTRING(partition_name FROM 2) AS UNSIGNED) < DELETE_BELOW_PARTITION_DATE;
        DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
 
        /*
           Create the basics for when we need to drop the partition.  Also, create
           @drop_partitions to hold a comma-delimited list of all partitions that
           should be deleted.
        */
        SET @alter_header = CONCAT("ALTER TABLE ", SCHEMANAME, ".", TABLENAME, " DROP PARTITION ");
        SET @drop_partitions = "";
 
        /*
           Start looping through all the partitions that are too old.
        */
        OPEN myCursor;
        read_loop: LOOP
                FETCH myCursor INTO drop_part_name;
                IF done THEN
                        LEAVE read_loop;
                END IF;
                SET @drop_partitions = IF(@drop_partitions = "", drop_part_name, CONCAT(@drop_partitions, ",", drop_part_name));
        END LOOP;
        IF @drop_partitions != "" THEN
                /*
                   1. Build the SQL to drop all the necessary partitions.
                   2. Run the SQL to drop the partitions.
                   3. Print out the table partitions that were deleted.
                */
                SET @full_sql = CONCAT(@alter_header, @drop_partitions, ";");
                PREPARE STMT FROM @full_sql;
                EXECUTE STMT;
                DEALLOCATE PREPARE STMT;
 
                SELECT CONCAT(SCHEMANAME, ".", TABLENAME) AS `table`, @drop_partitions AS `partitions_deleted`;
        ELSE
                /*
                   No partitions are being deleted, so print out "N/A" (Not applicable) to indicate
                   that no changes were made.
                */
                SELECT CONCAT(SCHEMANAME, ".", TABLENAME) AS `table`, "N/A" AS `partitions_deleted`;
        END IF;
END$$
DELIMITER ;
 
DELIMITER $$
CREATE PROCEDURE `partition_maintenance`(SCHEMA_NAME VARCHAR(32), TABLE_NAME VARCHAR(32), KEEP_DATA_DAYS INT, HOURLY_INTERVAL INT, CREATE_NEXT_INTERVALS INT)
BEGIN
        DECLARE OLDER_THAN_PARTITION_DATE VARCHAR(16);
        DECLARE PARTITION_NAME VARCHAR(16);
        DECLARE OLD_PARTITION_NAME VARCHAR(16);
        DECLARE LESS_THAN_TIMESTAMP INT;
        DECLARE CUR_TIME INT;
 
        CALL partition_verify(SCHEMA_NAME, TABLE_NAME, HOURLY_INTERVAL);
        SET CUR_TIME = UNIX_TIMESTAMP(DATE_FORMAT(NOW(), '%Y-%m-%d 00:00:00'));
 
        SET @__interval = 1;
        create_loop: LOOP
                IF @__interval > CREATE_NEXT_INTERVALS THEN
                        LEAVE create_loop;
                END IF;
 
                SET LESS_THAN_TIMESTAMP = CUR_TIME + (HOURLY_INTERVAL * @__interval * 3600);
                SET PARTITION_NAME = FROM_UNIXTIME(CUR_TIME + HOURLY_INTERVAL * (@__interval - 1) * 3600, 'p%Y%m%d%H00');
                IF(PARTITION_NAME != OLD_PARTITION_NAME) THEN
      CALL partition_create(SCHEMA_NAME, TABLE_NAME, PARTITION_NAME, LESS_THAN_TIMESTAMP);
    END IF;
                SET @__interval=@__interval+1;
                SET OLD_PARTITION_NAME = PARTITION_NAME;
        END LOOP;
 
        SET OLDER_THAN_PARTITION_DATE=DATE_FORMAT(DATE_SUB(NOW(), INTERVAL KEEP_DATA_DAYS DAY), '%Y%m%d0000');
        CALL partition_drop(SCHEMA_NAME, TABLE_NAME, OLDER_THAN_PARTITION_DATE);
 
END$$
DELIMITER ;
 
DELIMITER $$
CREATE PROCEDURE `partition_verify`(SCHEMANAME VARCHAR(64), TABLENAME VARCHAR(64), HOURLYINTERVAL INT(11))
BEGIN
        DECLARE PARTITION_NAME VARCHAR(16);
        DECLARE RETROWS INT(11);
        DECLARE FUTURE_TIMESTAMP TIMESTAMP;
 
        /*
         * Check if any partitions exist for the given SCHEMANAME.TABLENAME.
         */
        SELECT COUNT(1) INTO RETROWS
        FROM information_schema.partitions
        WHERE table_schema = SCHEMANAME AND table_name = TABLENAME AND partition_name IS NULL;
 
        /*
         * If partitions do not exist, go ahead and partition the table
         */
        IF RETROWS = 1 THEN
                /*
                 * Take the current date at 00:00:00 and add HOURLYINTERVAL to it.  This is the timestamp below which we will store values.
                 * We begin partitioning based on the beginning of a day.  This is because we don't want to generate a random partition
                 * that won't necessarily fall in line with the desired partition naming (ie: if the hour interval is 24 hours, we could
                 * end up creating a partition now named "p201403270600" when all other partitions will be like "p201403280000").
                 */
                SET FUTURE_TIMESTAMP = TIMESTAMPADD(HOUR, HOURLYINTERVAL, CONCAT(CURDATE(), " ", '00:00:00'));
                SET PARTITION_NAME = DATE_FORMAT(CURDATE(), 'p%Y%m%d%H00');
 
                -- Create the partitioning query
                SET @__PARTITION_SQL = CONCAT("ALTER TABLE ", SCHEMANAME, ".", TABLENAME, " PARTITION BY RANGE(`clock`)");
                SET @__PARTITION_SQL = CONCAT(@__PARTITION_SQL, "(PARTITION ", PARTITION_NAME, " VALUES LESS THAN (", UNIX_TIMESTAMP(FUTURE_TIMESTAMP), "));");
 
                -- Run the partitioning query
                PREPARE STMT FROM @__PARTITION_SQL;
                EXECUTE STMT;
                DEALLOCATE PREPARE STMT;
        END IF;
END$$
DELIMITER ;

  

 

 

2 代理proxy端

安装依赖

yum install curl* curl-devel libpcre libevent-devel -y

安装参数    

./configure --prefix=/usr/local/zabbix_proxy --sysconfdir=/etc/zabbix --enable-proxy --enable-agent  --enable-java --with-net-snmp --with-libcurl --with-libxml2  --with-openipmi --with-unixodbc --with-ldap --with-ssh2 --with-mysql=locate && make install

配置 

ProxyMode=0
LogFileSize=0
LogSlowQueries=3000
Timeout=4
Server=1.1.1.1
#zabbix服务端IP
Hostname=proxy-node
#必须和WEB页面添加代理时设置的名称一致
LogFile=/tmp/zabbix_proxy.log
#日志文件路径
DBHost=localhost
#数据库IP
DBName=zabbix
#数据库名
DBUser=XXXXX
#数据库用户名
DBPassword=XXXX
#数据库密码
DBSocket=/tmp/mysql.sock
DBPort=3306
DataSenderFrequency=5
#数据同步间隔
ConfigFrequency=60
#配置文件同步间隔
ProxyLocalBuffer=0
#当数据发送到Server,还要在本地保留多少小时.不保留
ProxyOfflineBuffer=3
#当数据没有发送到Server,在本地保留多少小时,3小时。
HeartbeatFrequency=60
#心跳检测代理在Server的可用性
ConfigFrequency=300
#代理多久从Server获取一次配置变化,默认3600秒.
DataSenderFrequency=3
#代理收集到数据后,多久向Server发送一次..
CacheSize=512M 
#用来保存监控数据的缓存数,根据监控主机数量适当调整
StartTrappers=20
# 接受客户端主动提交的trapper进程数量
StartPollers=20
# 以主动方式提取客户端监控数据
StartPingers=10
# ping的启动进程数量

#外部脚本目录
FpingLocation=/usr/sbin/fping

  

进程负载过高应该调整的参数值

Zabbix busy trapper processes, in %                   StartTrappers=5
Zabbix busy poller processes, in %                      StartPollers=5
Zabbix busy ipmi poller processes, in %               StartIPMIPollers=0
Zabbix busy discoverer processes, in %                StartDiscoverers=1
Zabbix busy icmp pinger processes, in %              StartPingers=1
Zabbix busy http poller processes, in %                StartHTTPPollers=1
Zabbix busy proxy poller processes, in %              StartProxyPollers=1
Zabbix busy unreachable poller processes, in %    StartPollersUnreachable=1
Zabbix busy java poller processes, in %                 StartJavaPollers=0
Zabbix busy snmp trapper processes, in %            StartSNMPTrapper=0
Zabbix busy vmware collector processes, in %       StartVMwareCollectors=0

  

3 agent 客户端

安装直接使用已编译完的包,解压即可

配置文件

cat >>/usr/local/zabbix-agent/etc/zabbix_agentd.conf << EOF
#version 1
LogFile=/tmp/zabbix_agentd.log
Server=yum.ops.net
ServerActive=yum.ops.net
Include=/usr/local/zabbix-agent/etc/zabbix_agentd.conf.d
Timeout=15

Hostname=web.base-1-4
EOF

systemctl start zabbix-agent

   

4 添加监控

http://ywzhou.blog.51cto.com/2785388/1580913

注意 在触发器中配置了

 

手工添加主机

 

 自定义监控项目

查询及时数据

  

 

5 配置检测

服务器端的检测

zabbix_get -s 1-16.goldcoin  -p 10050 -k 'jb[currentThreadsBusy]'

正常返回会是一个value值

 

6 各种监控服务

日志监控

一般使用自动发现自动发现日志文件,然后去做日志的监控项,最后去日志监控值进行触发操作。

自动发现中的发现规则
pyora-discovery[{$ADDRESS},{$DATABASE},show_alert]

自动发现项中的监控项原型:
log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,]

自动发现中的触发器
({Template Pyora-primary:log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,].regexp("ORA-|Checkpoint not complete|Can not allocate log|halt|abort|panic")}=1) and ({Template Pyora-primary:log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,].regexp("ORA-20001|ORA-00235|DBA-|16037|ORA-609|12537|12547|Tns error struct|ORA-48913|12541|ORA-16037")}=0 and {Template Pyora-primary:log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,].nodata(600)}=0)

A:

({Template Pyora-primary:log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,].regexp("ORA-|Checkpoint not complete|Can not allocate log|halt|abort|panic")}=1) 

log中包含了 regexp 的字符串,则表达式为真 

 

B:

({Template Pyora-primary:log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,].regexp("ORA-20001|ORA-00235|DBA-|16037|ORA-609|12537|12547|Tns error struct|ORA-48913|12541|ORA-16037")}=0

log中如果匹配到 regexp中的字符串,则为假,用于过滤一些不需要的告警触发

 

C:

{Template Pyora-primary:log[{#ALERTLOG},"ORA-|Checkpoint not complete|Can not allocate log|fail|halt|abort|panic",,,skip,].nodata(600)}=0)

表示600秒内有数据参数则表达式为真,即600秒内如果新数据了,就表示为假。为假,则告警恢复了。

  

 A&B&C 都为真时,才能是真。即item返回数据中会包含"断开",则触发器被触发,30秒内没有新数据的话,触发器恢复。这样就保证了触发器不会一直在触发状态。

 

关于log函数

log[/path/to/some/file,<regexp>,<encoding>,<maxlines>,<mode>,<output>]

logtr[/path/to/some/filename_format,<regexp>,<encoding>,<maxlines>,<mode>,<output>]

    ◆ regexp:要匹配内容的正则表达式,或者直接写你要检索的内容也可以,例如我想检索带ERROR关键词的记录

    ◆ encoding:编码相关,留空即可

    ◆ maxlines:一次性最多提交多少行,这个参数覆盖配置文件zabbxi_agentd.conf中的’MaxLinesPerSecond’,我们也可以留空

    ◆ mode:默认是all,也可以是skip,skip会跳过老数据

    ◆ output:输出给zabbix server的数据。可以是\1、\2一直\9,\1表示第一个正则表达式匹配出得内容,\2表示第二个正则表达式匹配错的内容。

logrt的第一个参数可以使用正则表达式。针对日志回滚用得,例如我们每天都切割nginx日志,日志名位www.a.com_2015-01-01.log、www.a.com_2015-01-02.log等等

注意事项:

    1、Zabbix Server和Zabbix Agent会追踪日志文件的大小和最后修改时间,并且分别记录在字节计数器和最新的时间计数器中。

    2、Agent会从上次读取日志的地方开始读取日志。

    3、字节计数器和最新时间计数器的数据会被记录在Zabbix数据库,并且发送给Agent,这样能够保证Agent从上次停止的地方开始读取日志。

    4、当日志文件大小小于字节计数器中的数字时,字节计数器会变为0,从头开始读取文件。

    5、所有符合配置的文件,都会被监控。

    6、一个目录下的多个文件如果修改时间相同,会按照字母顺序来读取。

    7、到每个Update interval的时间时,Agent会检查一次目录下的文件。

    8、Zabbix Agent每秒发送日志量,有一个日志行数上限,防止网络和CPU负载过高,这个数字在zabbix_agentd.conf中的MaxLinePerSecond。

    9、在logtr中,正则表达式只对文件名有效,对文件目录无效。

  

 

 

 

磁盘监控

可以将磁盘监控加到里面
https://github.com/Kevalin/ZabbixScripts

 

nginx监控

http://www.ttlsa.com/zabbix/zabbix-monitor-nginx-performance/

 

使用grafana 展示

http://8789878.blog.51cto.com/8779878/1681800

 

 

7 vmware的支持

支持vmware-scan需要安装libxml2

要装libxml2 ,anaconda2有个坑,把xml2-config 去掉再编译  

 

 

向zabbix添加esxi的主机监控,  能否通过proxy采集还要测试 

esxi主机监控:

模板 TEMPLATE_-_VMWARE_-_STANDALONE_ESXI_HOST

 

 

 

 

 

 

 

注意auto discovery的时间默认是3600s,调到60s

 

1.7.1 自动获取后 主机名都是ip地址,使用/etc/hosts 写 ip + 主机名
1.7.2 告警脚本都需要使用zabbix属组用户
1.7.3 测试告警只能通过调整触发器,而触发器都是公共的,没有服务器自定义的
1.7.4 告警确认之后,就不会再发,不然会连续发3
1.7.5 出现zabbix server负载高的问题 ,调整线程:

Zabbix busy trapper processes, in % StartTrappers=5
Zabbix busy poller processes, in % StartPollers=5
Zabbix busy ipmi poller processes, in % StartIPMIPollers=0
Zabbix busy discoverer processes, in % StartDiscoverers=1
Zabbix busy icmp pinger processes, in % StartPingers=1
Zabbix busy http poller processes, in % StartHTTPPollers=1
Zabbix busy proxy poller processes, in % StartProxyPollers=1
Zabbix busy unreachable poller processes, in % StartPollersUnreachable=1
Zabbix busy java poller processes, in % StartJavaPollers=0
Zabbix busy snmp trapper processes, in % StartSNMPTrapper=0
Zabbix busy vmware collector processes, in % StartVMwareCollectors=0

  

1.7.6 图形乱码问题

/usr/local/nginx/html/zabbix/include/defines.inc.php

中把FONT改为 (simkai)simkai.ttf 

1.7.7 zabbix server 调整成分区表

https://www.zabbix.org/wiki/Docs/howto/mysql_partition

posted @ 2018-11-07 11:49  richardzgt  阅读(833)  评论(0编辑  收藏  举报