监测并记录linux的进程jvm内存和gc信息的脚本

参考文章

JVM监控利器:jstat命令全方位详解
JVM内存监控shell脚本
JVM学习-内存监控工具(五)
jvm 内存监控脚本 jvm常用的监控和调试工具

脚本

#!/bin/bash
# 功能:监测JVM内存配置和GC状态,记录到滚动日志文件
# 使用方式:./jvm_gc_monitor.sh <Java进程PID> [监测间隔(秒,默认10)] [日志目录,默认/var/log/jvm_monitor]
# 数据大小单位:MB,时间单位ms

# ===================== 配置项 =====================
MAX_LOG_SIZE="100M"    # 单个日志文件最大大小(达到则滚动)
LOG_RETENTION=7        # 保留历史日志天数
# ==================================================

# 检查参数
if [ $# -lt 1 ]; then
    echo "用法错误!正确用法:$0 <Java_PID> [监测间隔(秒)] [日志目录]"
    echo "示例:$0 12345 5 /data/logs/jvm"
    exit 1
fi

# 定义核心变量
JAVA_PID=$1
INTERVAL=${2:-10}
LOG_DIR=${3:-/var/log/jvm_monitor}
LOG_FILE="${LOG_DIR}/jvm_gc_monitor.log"
SCRIPT_NAME=$(basename $0)

# 检查依赖命令
check_dependency() {
    local cmd=$1
    if ! command -v $cmd &> /dev/null; then
        echo "错误:未找到 $cmd 命令,请确保JDK已安装并配置环境变量!"
        exit 1
    fi
}
check_dependency jstat
check_dependency awk
check_dependency bc
check_dependency logrotate

# 检查PID是否为数字且对应Java进程
if ! [[ "$JAVA_PID" =~ ^[0-9]+$ ]]; then
    echo "错误:PID必须是数字!"
    exit 1
fi
if ! ps -p $JAVA_PID -o comm= | grep -i java &> /dev/null; then
    echo "错误:PID $JAVA_PID 不是Java进程!"
    exit 1
fi

# 创建日志目录(若不存在)
mkdir -p $LOG_DIR || { echo "错误:无法创建日志目录 $LOG_DIR,请检查权限!"; exit 1; }

# 配置logrotate(实现日志滚动)
LOGROTATE_CONF="/etc/logrotate.d/jvm_gc_monitor"
cat > $LOGROTATE_CONF << EOF
$LOG_FILE {
    size $MAX_LOG_SIZE
    rotate $LOG_RETENTION
    compress
    missingok
    notifempty
    create 0644 root root
}
EOF
echo "日志滚动配置已写入 $LOGROTATE_CONF"

# 写入日志头部(仅首次运行时)
if [ ! -s "$LOG_FILE" ]; then
    echo -e "=== JVM GC 监测日志 ===\n监测时间\tPID\tJVM版本" > "$LOG_FILE"
    # 获取JVM版本信息
    JVM_VERSION=$(jcmd $JAVA_PID VM.version 2>/dev/null | grep -E "JDK|JRE" | head -1 | awk '{$1=$2=""; print $0}' | sed 's/^ *//g')
    if [ -z "$JVM_VERSION" ]; then
        JVM_VERSION=$(java -version 2>&1 | grep -i version | awk -F '"' '{print $2}')
    fi
    echo -e "$(date "+%Y-%m-%d %H:%M:%S")\t$JAVA_PID\t$JVM_VERSION\n" >> "$LOG_FILE"
    
    # 写入内存指标表头
    echo -e "ts,survivor_capacity,survivor_usage,eden_capacity,eden_usage,old_capacity,old_usage,meta_capacity,meta_usage,younggc_times,younggc_elapse,fullgc_times,fullgc_elapse" >> "$LOG_FILE"
fi

# 核心监测循环
echo "开始监测Java进程(PID: $JAVA_PID),日志文件: $LOG_FILE,监测间隔: $INTERVAL 秒(按Ctrl+C停止)"
while true; do
    CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
    
    # 检查Java进程是否存活
    if ! ps -p $JAVA_PID > /dev/null 2>&1; then
        echo -e "$CURRENT_TIME\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t进程已退出" >> "$LOG_FILE"
        echo "[$CURRENT_TIME] 警告:Java进程(PID: $JAVA_PID)已退出,脚本将在3次重试后退出"
        RETRY_COUNT=$((RETRY_COUNT + 1))
        if [ $RETRY_COUNT -ge 3 ]; then
            echo "[$CURRENT_TIME] 错误:Java进程已退出,脚本退出"
            exit 1
        fi
        sleep $INTERVAL
        continue
    fi
    RETRY_COUNT=0

    # ==================== 解析jstat -gc(GC实时状态) ====================
    # jstat -gc 输出字段说明(单位:KB/次/毫秒):
    # S0C S1C S0U S1U EC EU OC OU MC MU CCSC CCSU YGC YGCT FGC FGCT GCT
    GC_STAT=$(jstat -gc $JAVA_PID 2>/dev/null)
    # 提取使用量和GC指标
    S0U_CAPACITY=$(echo "$GC_STAT" | tail -1 | awk '{print $1}')    # S0U区capacity(KB)
    S0U_CAPACITY_MB=$(echo "scale=2; $S0U_CAPACITY / 1024" | bc)
    S1U_CAPACITY=$(echo "$GC_STAT" | tail -1 | awk '{print $2}')    # S1U区capacity(KB)
    S1U_CAPACITY_MB=$(echo "scale=2; $S1U_CAPACITY / 1024" | bc)
    S0U_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $3}')    # S0U区usage(KB)
    S0U_USAGE_MB=$(echo "scale=2; $S0U_USAGE / 1024" | bc)
    S1U_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $4}')    # S1U区capacity(KB)
    S1U_USAGE_MB=$(echo "scale=2; $S1U_USAGE / 1024" | bc)
    SURVIOR_CAPACITY_MB=$(echo "scale=2; $S0U_CAPACITY_MB + $S1U_CAPACITY_MB" | bc)    # SURVIOR区capacity(KB)
    SURVIOR_USAGE_MB=$(echo "scale=2; $S0U_USAGE_MB + $S1U_USAGE_MB" | bc)    # SURVIOR区usage(KB)

    EC_CAPACTIY=$(echo "$GC_STAT" | tail -1 | awk '{print $5}')    # Eden区capacity(KB)
    EC_CAPACTIY_MB=$(echo "scale=2; $EC_CAPACTIY / 1024" | bc)
    EU_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $6}')    # Eden区usage(KB)
    EU_USAGE_MB=$(echo "scale=2; $EU_USAGE / 1024" | bc)
    OC_CAPACTIY=$(echo "$GC_STAT" | tail -1 | awk '{print $7}')   # 老年代capacity(KB)
    OC_CAPACTIY_MB=$(echo "scale=2; $OC_CAPACTIY / 1024" | bc)
    OU_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $8}')   # 老年代usage(KB)
    OU_USAGE_MB=$(echo "scale=2; $OU_USAGE / 1024" | bc)
    MC_CAPACTIY=$(echo "$GC_STAT" | tail -1 | awk '{print $9}')   # 元空间capacity(KB)
    MC_CAPACTIY_MB=$(echo "scale=2; $MC_CAPACTIY / 1024" | bc)
    MU_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $10}')   # 元空间使用(KB)
    MU_USAGE_MB=$(echo "scale=2; $MU_USAGE / 1024" | bc)
    YGC_COUNT=$(echo "$GC_STAT" | tail -1 | awk '{print $13}')  # YoungGC次数
    YGCT_TIME=$(echo "$GC_STAT" | tail -1 | awk '{print $14}')  # YoungGC耗时(秒)
    YGCT_MS=$(echo "scale=0; $YGCT_TIME * 1000" | bc)           # 转换为毫秒
    FGC_COUNT=$(echo "$GC_STAT" | tail -1 | awk '{print $15}')  # FullGC次数
    FGCT_TIME=$(echo "$GC_STAT" | tail -1 | awk '{print $16}')  # FullGC耗时(秒)
    FGCT_MS=$(echo "scale=0; $FGCT_TIME * 1000" | bc)           # 转换为毫秒
    # ==================== 3. 写入日志 ====================
    echo -e "$CURRENT_TIME,$SURVIOR_CAPACITY_MB,$SURVIOR_USAGE_MB,$EC_CAPACTIY_MB,$EU_USAGE_MB,$OC_CAPACTIY_MB,$OU_USAGE_MB,$MC_CAPACTIY_MB,$MU_USAGE_MB,$YGC_COUNT,$YGCT_MS,$FGC_COUNT,$FGCT_MS" >> "$LOG_FILE"

    # 实时打印监测结果(可选)
    echo "[$CURRENT_TIME] PID: $JAVA_PID | Eden: ${EU_USAGE_MB}MB/${EC_CAPACTIY_MB}MB | old: ${OU_USAGE_MB}MB/${OC_CAPACTIY_MB}MB | YoungGC: $YGC_COUNT 次($YGCT_MS ms) | FullGC: $FGC_COUNT 次($FGCT_MS ms)"

    # 触发logrotate检查(可选,确保日志及时滚动)
    logrotate -f $LOGROTATE_CONF > /dev/null 2>&1

    sleep $INTERVAL
done

posted @ 2026-02-27 16:12  lisacumt  阅读(2)  评论(0)    收藏  举报