sh看门狗脚本

看门狗程序脚本

#!/bin/bash
# testmain_watchdog.sh - Testmain 进程看门狗脚本

# ==================== 端口配置区域 ====================
# 在这里配置主端口和对应的服务端口
MAIN_PORTS=(30001 30002 30003 30004)
SERVICE_PORTS=(8001 8002 8003 8004)

# 验证端口配置
if [ ${#MAIN_PORTS[@]} -ne ${#SERVICE_PORTS[@]} ]; then
    echo "错误: 主端口和服务端口数量不一致"
    exit 1
fi

# 创建端口映射关系
declare -A PORT_MAPPING
for i in "${!MAIN_PORTS[@]}"; do
    PORT_MAPPING[${MAIN_PORTS[i]}]=${SERVICE_PORTS[i]}
done
# ==================== 端口配置结束 ====================

# 获取脚本所在目录
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_FILE="$SCRIPT_DIR/testmain_watchdog.conf"

# 默认配置
LOG_FILE="$SCRIPT_DIR/log/testmain_watchdog.log"
CHECK_INTERVAL=10

# 如果配置文件存在,则读取配置
if [ -f "$CONFIG_FILE" ]; then
    source "$CONFIG_FILE"
    echo "已加载配置文件: $CONFIG_FILE"
else
    echo "警告: 配置文件不存在, 使用默认配置"
fi

# 设置环境变量
export LD_LIBRARY_PATH=.:genericBase/:pic:common

# 切换到工作目录
if [ -n "$WORKING_DIR" ] && [ -d "$WORKING_DIR" ]; then
    cd "$WORKING_DIR" || exit 1
else
    cd "$SCRIPT_DIR" || exit 1
fi

# 日志函数
log_message() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}

# 检查进程是否存在
check_process() {
    local port=$1
    if pgrep -f "Testmain.*$port" > /dev/null; then
        return 0
    else
        return 1
    fi
}

# 获取进程PID
get_process_pid() {
    local port=$1
    pgrep -f "Testmain.*$port" | head -1
}

# 根据主端口获取对应的服务端口
get_service_port() {
    local main_port=$1
    echo "${PORT_MAPPING[$main_port]}"
}

# 启动进程
start_process() {
    local main_port=$1
    local service_port=$(get_service_port $main_port)
    
    if [ -z "$service_port" ]; then
        log_message "错误: 未找到主端口 $main_port 对应的服务端口配置"
        return 1
    fi
    
    log_message "启动 Testmain 进程 - 主端口: $main_port, 服务端口: $service_port"
    
    # 检查 Testmain 可执行文件是否存在
    if [ ! -f "./Testmain" ]; then
        log_message "错误: Testmain 可执行文件不存在于当前目录: $(pwd)"
        return 1
    fi
    
    # 后台启动进程
    nohup ./Testmain CENTER:10.97.164.158:17192 1 . $main_port '{"host":"127.0.0.1","port":'$service_port',"stream":false}' > /dev/null 2>&1 &
    local pid=$!
    
    # 等待进程启动
    sleep 5
    if check_process $main_port; then
        local actual_pid=$(get_process_pid $main_port)
        log_message "✓ Testmain 启动成功 - 主端口: $main_port, 服务端口: $service_port (PID: $actual_pid)"
        return 0
    else
        log_message "✗ Testmain 启动失败 - 主端口: $main_port, 服务端口: $service_port"
        # 输出启动日志以便调试
        if [ -f "$SCRIPT_DIR/log/testmain_${main_port}.log" ]; then
            log_message "启动日志: $(tail -5 "$SCRIPT_DIR/log/testmain_${main_port}.log")"
        fi
        return 1
    fi
}

# 停止进程
stop_process() {
    local port=$1
    local pid=$(get_process_pid $port)
    
    if [ -n "$pid" ]; then
        log_message "停止 Testmain 进程 (端口: $port, PID: $pid)..."
        kill -TERM $pid
        sleep 2
        
        # 强制杀死如果仍然存在
        if check_process $port; then
            kill -KILL $pid 2>/dev/null
            log_message "强制停止 Testmain 进程 (端口: $port)"
        fi
    fi
}

# 检查端口是否被占用
check_port_usage() {
    local port=$1
    if netstat -tuln | grep ":$port " > /dev/null; then
        return 0  # 端口被占用
    else
        return 1  # 端口空闲
    fi
}

# 监控主循环
monitor_loop() {
    log_message "Testmain 看门狗启动"
    log_message "工作目录: $(pwd)"
    log_message "监控主端口数量: ${#MAIN_PORTS[@]}"
    log_message "检查间隔: $CHECK_INTERVAL 秒"
    
    # 显示端口映射关系
    log_message "端口映射关系:"
    for main_port in "${MAIN_PORTS[@]}"; do
        local service_port=$(get_service_port $main_port)
        log_message "  $main_port -> $service_port"
    done
    
    # 初始启动所有进程
    log_message "执行初始启动检查..."
    for main_port in "${MAIN_PORTS[@]}"; do
        if ! check_process $main_port; then
            log_message "初始启动: 端口 $main_port 的进程不存在,尝试启动..."
            start_process $main_port
        else
            local pid=$(get_process_pid $main_port)
            local service_port=$(get_service_port $main_port)
            log_message "初始检查: 端口 $main_port 的进程已运行 (PID: $pid, 服务端口: $service_port)"
        fi
    done
    
    # 监控循环
    while true; do
        for main_port in "${MAIN_PORTS[@]}"; do
            local service_port=$(get_service_port $main_port)
            
            if check_process $main_port; then
                local pid=$(get_process_pid $main_port)
                #log_message "✓ Testmain 运行正常 - 主端口: $main_port, 服务端口: $service_port (PID: $pid)"
            else
                log_message "⚠ Testmain 进程不存在 - 主端口: $main_port, 服务端口: $service_port,尝试重启..."
                
                # 检查端口是否被其他进程占用
                if check_port_usage $main_port; then
                    log_message "警告: 端口 $main_port 被其他进程占用,无法启动"
                    continue
                fi
                
                #if check_port_usage $service_port; then
                #    log_message "警告: 服务端口 $service_port 被其他进程占用,无法启动"
                #    continue
                #fi
                
                start_process $main_port
            fi
        done
        
        sleep $CHECK_INTERVAL
    done
}

# 信号处理
cleanup() {
    log_message "看门狗脚本停止,清理进程..."
    for main_port in "${MAIN_PORTS[@]}"; do
        stop_process $main_port
    done
    exit 0
}

trap cleanup SIGTERM SIGINT

# 主函数
main() {
    # 检查是否以root权限运行
    if [ "$EUID" -ne 0 ]; then
        echo "请以root权限运行此脚本: sudo $0"
        exit 1
    fi
    
    # 创建日志目录
    mkdir -p "$(dirname "$LOG_FILE")"
    touch "$LOG_FILE"
    
    # 检查必要文件
    if [ ! -f "./Testmain" ]; then
        log_message "错误: Testmain 可执行文件不存在于当前目录: $(pwd)"
        exit 1
    fi
    
    # 启动监控
    monitor_loop
}

# 运行主函数
main "$@"

管理服务脚本

#!/bin/bash
# testmain_watchdog_manager.sh - 看门狗管理脚本

# ==================== 端口配置区域 ====================
# 在这里配置主端口和对应的服务端口
MAIN_PORTS=(30001 30002 30003 30004)
SERVICE_PORTS=(8001 8002 8003 8004)
WATCHDOG_SERVICE=testmain-watchdog
SERVICE_FILE="/etc/systemd/system/${WATCHDOG_SERVICE}.service"
# 验证端口配置
if [ ${#MAIN_PORTS[@]} -ne ${#SERVICE_PORTS[@]} ]; then
    echo "错误: 主端口和服务端口数量不一致"
    exit 1
fi

# 创建端口映射关系
declare -A PORT_MAPPING
for i in "${!MAIN_PORTS[@]}"; do
    PORT_MAPPING[${MAIN_PORTS[i]}]=${SERVICE_PORTS[i]}
done
# ==================== 端口配置结束 ====================

# 获取当前脚本所在目录作为根路径
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_FILE="$SCRIPT_DIR/testmain_watchdog.conf"
WATCHDOG_SCRIPT="$SCRIPT_DIR/testmain_watchdog.sh"

# 根据主端口获取对应的服务端口
get_service_port() {
    local main_port=$1
    echo "${PORT_MAPPING[$main_port]}"
}

# 创建默认配置
create_config() {
    cat > "$CONFIG_FILE" << EOF
# Testmain 看门狗配置文件

# 主端口列表(与脚本中的配置保持一致)
MAIN_PORTS=(${MAIN_PORTS[@]})

# 服务端口列表(与脚本中的配置保持一致)  
SERVICE_PORTS=(${SERVICE_PORTS[@]})

# 监控间隔(秒)
CHECK_INTERVAL=30

# 工作目录(自动设置为当前脚本目录)
WORKING_DIR=$SCRIPT_DIR

# 日志文件
LOG_FILE=$SCRIPT_DIR/log/testmain_watchdog.log

# 环境变量
LD_LIBRARY_PATH=.:genericBase/:pic:common

# 中心服务器配置
CENTER_SERVER=10.97.164.158:17192
SERVICE_HOST=127.0.0.1
EOF
    echo "配置文件已创建: $CONFIG_FILE"
    echo "工作目录: $SCRIPT_DIR"
    echo "端口配置:"
    for main_port in "${MAIN_PORTS[@]}"; do
        service_port=$(get_service_port $main_port)
        echo "  $main_port -> $service_port"
    done
}

# 查看进程状态
show_status() {
    echo "=== Testmain 进程状态 ==="
    echo "主端口数量: ${#MAIN_PORTS[@]}"
    echo "服务端口数量: ${#SERVICE_PORTS[@]}"
    echo ""
    
    for main_port in "${MAIN_PORTS[@]}"; do
        local service_port=$(get_service_port $main_port)
        pid=$(pgrep -f "Testmain.*$main_port" | head -1)
        if [ -n "$pid" ]; then
            # 获取进程详细信息
            process_info=$(ps -p $pid -o pid,cmd --no-headers 2>/dev/null | cut -c1-80)
            echo "主端口 $main_port (服务端口 $service_port): ✓ 运行中 (PID: $pid)"
            echo "        命令: $process_info"
            
            # 检查端口占用
            if netstat -tuln | grep ":$main_port " > /dev/null; then
                echo "        主端口 $main_port: ✓ 监听中"
            else
                echo "        主端口 $main_port: ✗ 未监听"
            fi
            
            if netstat -tuln | grep ":$service_port " > /dev/null; then
                echo "        服务端口 $service_port: ✓ 监听中"
            else
                echo "        服务端口 $service_port: ✗ 未监听"
            fi
        else
            echo "主端口 $main_port (服务端口 $service_port): ✗ 未运行"
        fi
        echo ""
    done
    
    echo -e "\n=== 看门狗服务状态 ==="
    if systemctl is-active $WATCHDOG_SERVICE >/dev/null 2>&1; then
        systemctl status $WATCHDOG_SERVICE --no-pager
    else
        echo "看门狗服务未运行"
    fi
    echo -e "\n=== Testmain进程状态 ==="
    ps -ef | grep Testmain
    echo -e "\n=== 目录信息 ==="
    echo "工作目录: $SCRIPT_DIR"
    echo "Testmain 文件: $(ls -la "$SCRIPT_DIR/Testmain" 2>/dev/null || echo "不存在")"
}

# 启动单个进程
start_single_process() {
    local main_port=$1
    local service_port=$(get_service_port $main_port)
    
    if [ -z "$service_port" ]; then
        echo "错误: 未找到主端口 $main_port 对应的服务端口配置"
        return 1
    fi
    
    pid=$(pgrep -f "Testmain.*$main_port" | head -1)
    if [ -z "$pid" ]; then
        echo "启动 Testmain - 主端口: $main_port, 服务端口: $service_port"
        cd "$SCRIPT_DIR" || {
            echo "错误: 无法切换到目录 $SCRIPT_DIR"
            return 1
        }
        export LD_LIBRARY_PATH=.:genericBase/:pic:common
        nohup ./Testmain CENTER:10.97.164.158:17192 1 . $main_port '{"host":"127.0.0.1","port":'$service_port'}' > "$SCRIPT_DIR/log/testmain_${main_port}.log" 2>&1 &
        echo "启动命令执行完成"
        sleep 3
        
        if pgrep -f "Testmain.*$main_port" > /dev/null; then
            echo "✓ 端口 $main_port 启动成功"
        else
            echo "✗ 端口 $main_port 启动失败"
        fi
    else
        echo "端口 $main_port 已在运行 (PID: $pid)"
    fi
}

# 启动所有进程
start_all() {
    echo "启动所有 Testmain 进程..."
    echo "工作目录: $SCRIPT_DIR"
    
    # 检查 Testmain 文件是否存在
    if [ ! -f "$SCRIPT_DIR/Testmain" ]; then
        echo "错误: Testmain 可执行文件不存在于 $SCRIPT_DIR"
        return 1
    fi
    
    for main_port in "${MAIN_PORTS[@]}"; do
        start_single_process $main_port
    done
    
    echo -e "\n启动完成,最终状态:"
    show_status
}

# 停止单个进程
stop_single_process() {
    local main_port=$1
    pids=$(pgrep -f "Testmain.*$main_port")
    if [ -n "$pids" ]; then
        echo "停止端口 $main_port (PIDs: $pids)..."
        kill -TERM $pids 2>/dev/null
    else
        echo "端口 $main_port 未运行"
    fi
}

# 停止所有进程
stop_all() {
    echo "停止所有 Testmain 进程..."
    for main_port in "${MAIN_PORTS[@]}"; do
        stop_single_process $main_port
    done
    
    # 等待进程停止
    sleep 3
    
    # 强制杀死仍在运行的进程
    for main_port in "${MAIN_PORTS[@]}"; do
        pids=$(pgrep -f "Testmain.*$main_port")
        if [ -n "$pids" ]; then
            echo "强制停止端口 $main_port (PIDs: $pids)..."
            kill -KILL $pids 2>/dev/null
        fi
    done
    
    echo -e "\n停止完成,最终状态:"
    show_status
}

# 查看端口映射
show_port_mapping() {
    echo "=== 端口映射关系 ==="
    echo "索引 | 主端口 -> 服务端口"
    echo "----|-----------------"
    for i in "${!MAIN_PORTS[@]}"; do
        printf "%-4d | %d -> %d\n" $i ${MAIN_PORTS[i]} ${SERVICE_PORTS[i]}
    done
}

# 查看日志
show_log() {
    local log_type=${1:-watchdog}
    
    case "$log_type" in
        "watchdog")
            if [ -f "$SCRIPT_DIR/log/testmain_watchdog.log" ]; then
                echo "=== 看门狗日志 (最后50行) ==="
                tail -50 "$SCRIPT_DIR/log/testmain_watchdog.log"
            else
                echo "看门狗日志文件不存在: $SCRIPT_DIR/log/testmain_watchdog.log"
            fi
            ;;
        *)
            # 检查是否是有效的主端口
            if [[ " ${MAIN_PORTS[@]} " =~ " ${log_type} " ]]; then
                if [ -f "$SCRIPT_DIR/log/testmain_$log_type.log" ]; then
                    echo "=== 主端口 $log_type 的 Testmain 日志 (最后30行) ==="
                    tail -30 "$SCRIPT_DIR/log/testmain_$log_type.log"
                else
                    echo "端口 $log_type 的日志文件不存在: $SCRIPT_DIR/log/testmain_$log_type.log"
                fi
            else
                echo "用法: $0 log {watchdog|主端口号}"
                echo "可用主端口: ${MAIN_PORTS[*]}"
            fi
            ;;
    esac
}

# 安装 systemd 服务
install_service() {
    echo "安装 systemd 服务..."
    
    # 创建服务文件
    cat > /etc/systemd/system/$WATCHDOG_SERVICE.service << EOF
[Unit]
Description=Testmain Process Watchdog Service
After=network.target
Wants=network.target

[Service]
Type=simple
User=root
WorkingDirectory=$SCRIPT_DIR
ExecStart=$SCRIPT_DIR/testmain_watchdog.sh
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal

# 设置环境变量
Environment=LD_LIBRARY_PATH=.:genericBase/:pic:common

[Install]
WantedBy=multi-user.target
EOF

    # 重新加载 systemd
    systemctl daemon-reload
    echo "服务文件已创建: /etc/systemd/system/$WATCHDOG_SERVICE.service"
    echo "工作目录: $SCRIPT_DIR"
    
    # 启用服务
    systemctl enable $WATCHDOG_SERVICE
    echo "看门狗服务已启用开机自启动"
    
    echo -e "\n端口配置:"
    show_port_mapping
    
    echo -e "\n下一步操作:"
    echo "sudo systemctl start $WATCHDOG_SERVICE    # 启动服务"
    echo "sudo systemctl status $WATCHDOG_SERVICE   # 查看状态"
}
# 卸载服务
uninstall_service() {
    echo "开始卸载 Testmain 看门狗服务..."
    
    # 停止看门狗服务
    if systemctl is-active $WATCHDOG_SERVICE >/dev/null 2>&1; then
        echo "停止看门狗服务..."
        systemctl stop $WATCHDOG_SERVICE
        echo "看门狗服务已停止"
    else
        echo "看门狗服务未运行"
    fi
    
    # 禁用服务
    if systemctl is-enabled $WATCHDOG_SERVICE >/dev/null 2>&1; then
        echo "禁用看门狗服务..."
        systemctl disable $WATCHDOG_SERVICE
        echo "看门狗服务已禁用"
    fi
    
    # 删除服务文件
    if [ -f "$SERVICE_FILE" ]; then
        echo "删除服务文件: $SERVICE_FILE"
        rm -f "$SERVICE_FILE"
        systemctl daemon-reload
        echo "服务文件已删除"
    else
        echo "服务文件不存在: $SERVICE_FILE"
    fi
    
    # 停止所有 Testmain 进程
    echo "停止所有 Testmain 进程..."
    for main_port in "${MAIN_PORTS[@]}"; do
        pids=$(pgrep -f "Testmain.*$main_port")
        if [ -n "$pids" ]; then
            echo "停止端口 $main_port 的进程 (PIDs: $pids)..."
            kill -TERM $pids 2>/dev/null
            sleep 2
            # 强制杀死仍在运行的进程
            pids=$(pgrep -f "Testmain.*$main_port")
            if [ -n "$pids" ]; then
                echo "强制停止端口 $main_port 的进程 (PIDs: $pids)..."
                kill -KILL $pids 2>/dev/null
            fi
        else
            echo "端口 $main_port 的进程未运行"
        fi
    done
    
    # 确认所有进程已停止
    echo "确认进程状态..."
    running_pids=$(pgrep -f "Testmain")
    if [ -n "$running_pids" ]; then
        echo "警告: 仍有 Testmain 进程在运行 (PIDs: $running_pids)"
        echo "强制杀死所有剩余进程..."
        pkill -f "Testmain"
        sleep 1
        pkill -9 -f "Testmain"
    else
        echo "所有 Testmain 进程已停止"
    fi
    
    echo -e "\n卸载完成!"
    echo "已执行的操作:"
    echo "✓ 停止并禁用看门狗服务"
    echo "✓ 删除服务文件"
    echo "✓ 停止所有 Testmain 进程"
    echo ""
    echo "如需重新安装,请运行: \$0 install"
}
case "$1" in
    "init")
        create_config
        ;;
    "status")
        show_status
        ;;
    "start")
        if systemctl is-active $WATCHDOG_SERVICE >/dev/null 2>&1; then
            echo "看门狗服务已经启动"
        else
            echo "看门狗服务未运行,尝试启动..."
            systemctl start $WATCHDOG_SERVICE
            sleep 2
            systemctl status $WATCHDOG_SERVICE --no-pager
        fi
        ;;
    "stop")
        systemctl stop $WATCHDOG_SERVICE
        if [ -n "$2" ]; then
            if [[ " ${MAIN_PORTS[@]} " =~ " $2 " ]]; then
                stop_single_process "$2"
            else
                echo "错误: 无效的主端口 $2"
                echo "可用主端口: ${MAIN_PORTS[*]}"
            fi
        else
            stop_all
        fi
        ;;
    "log")
        show_log "$2"
        ;;
    "install")
        install_service
        ;;
    "uninstall")
        uninstall_service
        ;;
    "restart")
        stop_all
        if systemctl is-active $WATCHDOG_SERVICE >/dev/null 2>&1; then
            echo "重启看门狗服务..."
            systemctl restart $WATCHDOG_SERVICE
            sleep 2
            systemctl status $WATCHDOG_SERVICE --no-pager
        else
            echo "看门狗服务未运行,尝试启动..."
            systemctl start $WATCHDOG_SERVICE
            sleep 2
            systemctl status $WATCHDOG_SERVICE --no-pager
        fi
        ;;
    "ports")
        show_port_mapping
        ;;
    "config")
        echo "=== 当前端口配置 ==="
        echo "主端口: ${MAIN_PORTS[*]}"
        echo "服务端口: ${SERVICE_PORTS[*]}"
        show_port_mapping
        ;;
    *)
        echo "用法: $0 {init|status|start|stop|log|install|restart|ports|config}"
        echo "  init                   - 创建配置文件"
        echo "  status                 - 查看状态"
        echo "  start [port]           - 启动所有进程或指定端口进程"
        echo "  stop [port]            - 停止所有进程或指定端口进程"
        echo "  log [type]             - 查看日志(watchdog|主端口号)"
        echo "  install                - 安装 systemd 服务"
        echo "  restart                - 重启看门狗服务"
        echo "  ports                  - 显示端口映射关系"
        echo "  config                 - 显示当前端口配置"
        echo ""
        echo "示例:"
        echo "  $0 config              # 查看端口配置"
        echo "  $0 start 30000         # 启动端口30000的进程"
        echo "  $0 log 30000           # 查看端口30000的日志"
        echo "  $0 ports               # 查看端口映射"
        ;;
esac

 

posted @ 2025-11-24 10:41  一字千金  阅读(10)  评论(0)    收藏  举报