数据接入中间件监控程序

24 阅读1分钟

/skyland/

sed -i 's/\r$//' mem_watch.sh
chmod +x mem_watch.sh

//后台启动模式
nohup ./mem_watch.sh >> mem_watch.log 2>&1 &
tail -f mem_watch.log

mem_watch.sh

#!/bin/bash

PROC_NAME="sql-executor-linux64-sjjr.out"
PM2_NAME="sql-executor-sjjr-15013"
MAX_KB=$((35*1024*1024))  # 35GB

function watch_one() {
    local pid=$1
    while kill -0 "$pid" 2>/dev/null; do
        RSS_KB=$(ps -p "$pid" -o rss= | tr -d ' ')
        RSS_GB=$((RSS_KB / 1024 / 1024))
        echo "$(date '+%F %T') PID=$pid RSS=${RSS_GB}GB"
        if (( RSS_KB > MAX_KB )); then
            echo "$(date '+%F %T') 内存超过 35GB,执行 pm2 重启"
            pm2 restart "$PM2_NAME"
            return 1  # 触发重启,跳出当前 PID 监控
        fi
        sleep 60
    done
    echo "$(date '+%F %T') PID=$pid 已退出"
    return 0
}

while true; do
    PID=$(pgrep -f "$PROC_NAME")
    if [[ -z "$PID" ]]; then
        echo "$(date '+%F %T') 进程未启动,等待 60 秒后继续尝试..."
        sleep 60
        continue
    fi
    echo "$(date '+%F %T') 开始监控 PID=$PID"
    watch_one "$PID"
    echo "$(date '+%F %T') 等待 10 秒后重新获取新 PID..."
    sleep 10
done

开机自启
✅ 1. 创建服务文件

bash
复制
sudo nano /etc/systemd/system/mem_watch.service
粘贴以下内容(路径按你实际目录改):
ini
复制
[Unit]
Description=Memory Watchdog for sql-executor-sjjr
After=network.target

[Service]
Type=simple
User=root
WorkingDirectory=/root/skyland
ExecStart=/bin/bash /root/skyland/mem_watch.sh
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target

✅ 2. 重载并启动

sudo systemctl daemon-reload
sudo systemctl enable --now mem_watch.service

✅ 3. 常用命令

image.png

✅ 4. 卸载(如需) sudo systemctl stop mem_watch
sudo systemctl disable mem_watch
sudo rm /etc/systemd/system/mem_watch.service
sudo systemctl daemon-reload