监测并记录linux的进程jvm内存和gc信息的脚本
参考文章
JVM监控利器:jstat命令全方位详解
JVM内存监控shell脚本
JVM学习-内存监控工具(五)
jvm 内存监控脚本 jvm常用的监控和调试工具
脚本
#!/bin/bash
# 功能:监测JVM内存配置和GC状态,记录到滚动日志文件
# 使用方式:./jvm_gc_monitor.sh <Java进程PID> [监测间隔(秒,默认10)] [日志目录,默认/var/log/jvm_monitor]
# 数据大小单位:MB,时间单位ms
# ===================== 配置项 =====================
MAX_LOG_SIZE="100M" # 单个日志文件最大大小(达到则滚动)
LOG_RETENTION=7 # 保留历史日志天数
# ==================================================
# 检查参数
if [ $# -lt 1 ]; then
echo "用法错误!正确用法:$0 <Java_PID> [监测间隔(秒)] [日志目录]"
echo "示例:$0 12345 5 /data/logs/jvm"
exit 1
fi
# 定义核心变量
JAVA_PID=$1
INTERVAL=${2:-10}
LOG_DIR=${3:-/var/log/jvm_monitor}
LOG_FILE="${LOG_DIR}/jvm_gc_monitor.log"
SCRIPT_NAME=$(basename $0)
# 检查依赖命令
check_dependency() {
local cmd=$1
if ! command -v $cmd &> /dev/null; then
echo "错误:未找到 $cmd 命令,请确保JDK已安装并配置环境变量!"
exit 1
fi
}
check_dependency jstat
check_dependency awk
check_dependency bc
check_dependency logrotate
# 检查PID是否为数字且对应Java进程
if ! [[ "$JAVA_PID" =~ ^[0-9]+$ ]]; then
echo "错误:PID必须是数字!"
exit 1
fi
if ! ps -p $JAVA_PID -o comm= | grep -i java &> /dev/null; then
echo "错误:PID $JAVA_PID 不是Java进程!"
exit 1
fi
# 创建日志目录(若不存在)
mkdir -p $LOG_DIR || { echo "错误:无法创建日志目录 $LOG_DIR,请检查权限!"; exit 1; }
# 配置logrotate(实现日志滚动)
LOGROTATE_CONF="/etc/logrotate.d/jvm_gc_monitor"
cat > $LOGROTATE_CONF << EOF
$LOG_FILE {
size $MAX_LOG_SIZE
rotate $LOG_RETENTION
compress
missingok
notifempty
create 0644 root root
}
EOF
echo "日志滚动配置已写入 $LOGROTATE_CONF"
# 写入日志头部(仅首次运行时)
if [ ! -s "$LOG_FILE" ]; then
echo -e "=== JVM GC 监测日志 ===\n监测时间\tPID\tJVM版本" > "$LOG_FILE"
# 获取JVM版本信息
JVM_VERSION=$(jcmd $JAVA_PID VM.version 2>/dev/null | grep -E "JDK|JRE" | head -1 | awk '{$1=$2=""; print $0}' | sed 's/^ *//g')
if [ -z "$JVM_VERSION" ]; then
JVM_VERSION=$(java -version 2>&1 | grep -i version | awk -F '"' '{print $2}')
fi
echo -e "$(date "+%Y-%m-%d %H:%M:%S")\t$JAVA_PID\t$JVM_VERSION\n" >> "$LOG_FILE"
# 写入内存指标表头
echo -e "ts,survivor_capacity,survivor_usage,eden_capacity,eden_usage,old_capacity,old_usage,meta_capacity,meta_usage,younggc_times,younggc_elapse,fullgc_times,fullgc_elapse" >> "$LOG_FILE"
fi
# 核心监测循环
echo "开始监测Java进程(PID: $JAVA_PID),日志文件: $LOG_FILE,监测间隔: $INTERVAL 秒(按Ctrl+C停止)"
while true; do
CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
# 检查Java进程是否存活
if ! ps -p $JAVA_PID > /dev/null 2>&1; then
echo -e "$CURRENT_TIME\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t进程已退出" >> "$LOG_FILE"
echo "[$CURRENT_TIME] 警告:Java进程(PID: $JAVA_PID)已退出,脚本将在3次重试后退出"
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -ge 3 ]; then
echo "[$CURRENT_TIME] 错误:Java进程已退出,脚本退出"
exit 1
fi
sleep $INTERVAL
continue
fi
RETRY_COUNT=0
# ==================== 解析jstat -gc(GC实时状态) ====================
# jstat -gc 输出字段说明(单位:KB/次/毫秒):
# S0C S1C S0U S1U EC EU OC OU MC MU CCSC CCSU YGC YGCT FGC FGCT GCT
GC_STAT=$(jstat -gc $JAVA_PID 2>/dev/null)
# 提取使用量和GC指标
S0U_CAPACITY=$(echo "$GC_STAT" | tail -1 | awk '{print $1}') # S0U区capacity(KB)
S0U_CAPACITY_MB=$(echo "scale=2; $S0U_CAPACITY / 1024" | bc)
S1U_CAPACITY=$(echo "$GC_STAT" | tail -1 | awk '{print $2}') # S1U区capacity(KB)
S1U_CAPACITY_MB=$(echo "scale=2; $S1U_CAPACITY / 1024" | bc)
S0U_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $3}') # S0U区usage(KB)
S0U_USAGE_MB=$(echo "scale=2; $S0U_USAGE / 1024" | bc)
S1U_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $4}') # S1U区capacity(KB)
S1U_USAGE_MB=$(echo "scale=2; $S1U_USAGE / 1024" | bc)
SURVIOR_CAPACITY_MB=$(echo "scale=2; $S0U_CAPACITY_MB + $S1U_CAPACITY_MB" | bc) # SURVIOR区capacity(KB)
SURVIOR_USAGE_MB=$(echo "scale=2; $S0U_USAGE_MB + $S1U_USAGE_MB" | bc) # SURVIOR区usage(KB)
EC_CAPACTIY=$(echo "$GC_STAT" | tail -1 | awk '{print $5}') # Eden区capacity(KB)
EC_CAPACTIY_MB=$(echo "scale=2; $EC_CAPACTIY / 1024" | bc)
EU_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $6}') # Eden区usage(KB)
EU_USAGE_MB=$(echo "scale=2; $EU_USAGE / 1024" | bc)
OC_CAPACTIY=$(echo "$GC_STAT" | tail -1 | awk '{print $7}') # 老年代capacity(KB)
OC_CAPACTIY_MB=$(echo "scale=2; $OC_CAPACTIY / 1024" | bc)
OU_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $8}') # 老年代usage(KB)
OU_USAGE_MB=$(echo "scale=2; $OU_USAGE / 1024" | bc)
MC_CAPACTIY=$(echo "$GC_STAT" | tail -1 | awk '{print $9}') # 元空间capacity(KB)
MC_CAPACTIY_MB=$(echo "scale=2; $MC_CAPACTIY / 1024" | bc)
MU_USAGE=$(echo "$GC_STAT" | tail -1 | awk '{print $10}') # 元空间使用(KB)
MU_USAGE_MB=$(echo "scale=2; $MU_USAGE / 1024" | bc)
YGC_COUNT=$(echo "$GC_STAT" | tail -1 | awk '{print $13}') # YoungGC次数
YGCT_TIME=$(echo "$GC_STAT" | tail -1 | awk '{print $14}') # YoungGC耗时(秒)
YGCT_MS=$(echo "scale=0; $YGCT_TIME * 1000" | bc) # 转换为毫秒
FGC_COUNT=$(echo "$GC_STAT" | tail -1 | awk '{print $15}') # FullGC次数
FGCT_TIME=$(echo "$GC_STAT" | tail -1 | awk '{print $16}') # FullGC耗时(秒)
FGCT_MS=$(echo "scale=0; $FGCT_TIME * 1000" | bc) # 转换为毫秒
# ==================== 3. 写入日志 ====================
echo -e "$CURRENT_TIME,$SURVIOR_CAPACITY_MB,$SURVIOR_USAGE_MB,$EC_CAPACTIY_MB,$EU_USAGE_MB,$OC_CAPACTIY_MB,$OU_USAGE_MB,$MC_CAPACTIY_MB,$MU_USAGE_MB,$YGC_COUNT,$YGCT_MS,$FGC_COUNT,$FGCT_MS" >> "$LOG_FILE"
# 实时打印监测结果(可选)
echo "[$CURRENT_TIME] PID: $JAVA_PID | Eden: ${EU_USAGE_MB}MB/${EC_CAPACTIY_MB}MB | old: ${OU_USAGE_MB}MB/${OC_CAPACTIY_MB}MB | YoungGC: $YGC_COUNT 次($YGCT_MS ms) | FullGC: $FGC_COUNT 次($FGCT_MS ms)"
# 触发logrotate检查(可选,确保日志及时滚动)
logrotate -f $LOGROTATE_CONF > /dev/null 2>&1
sleep $INTERVAL
done
浙公网安备 33010602011771号