/skyland/
sed -i 's/\r$//' mem_watch.sh
chmod +x mem_watch.sh
//后台启动模式
nohup ./mem_watch.sh >> mem_watch.log 2>&1 &
tail -f mem_watch.log
mem_watch.sh
#!/bin/bash
PROC_NAME="sql-executor-linux64-sjjr.out"
PM2_NAME="sql-executor-sjjr-15013"
MAX_KB=$((35*1024*1024)) # 35GB
function watch_one() {
local pid=$1
while kill -0 "$pid" 2>/dev/null; do
RSS_KB=$(ps -p "$pid" -o rss= | tr -d ' ')
RSS_GB=$((RSS_KB / 1024 / 1024))
echo "$(date '+%F %T') PID=$pid RSS=${RSS_GB}GB"
if (( RSS_KB > MAX_KB )); then
echo "$(date '+%F %T') 内存超过 35GB,执行 pm2 重启"
pm2 restart "$PM2_NAME"
return 1 # 触发重启,跳出当前 PID 监控
fi
sleep 60
done
echo "$(date '+%F %T') PID=$pid 已退出"
return 0
}
while true; do
PID=$(pgrep -f "$PROC_NAME")
if [[ -z "$PID" ]]; then
echo "$(date '+%F %T') 进程未启动,等待 60 秒后继续尝试..."
sleep 60
continue
fi
echo "$(date '+%F %T') 开始监控 PID=$PID"
watch_one "$PID"
echo "$(date '+%F %T') 等待 10 秒后重新获取新 PID..."
sleep 10
done
开机自启
✅ 1. 创建服务文件
bash
复制
sudo nano /etc/systemd/system/mem_watch.service
粘贴以下内容(路径按你实际目录改):
ini
复制
[Unit]
Description=Memory Watchdog for sql-executor-sjjr
After=network.target
[Service]
Type=simple
User=root
WorkingDirectory=/root/skyland
ExecStart=/bin/bash /root/skyland/mem_watch.sh
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
✅ 2. 重载并启动
sudo systemctl daemon-reload
sudo systemctl enable --now mem_watch.service
✅ 3. 常用命令
✅ 4. 卸载(如需)
sudo systemctl stop mem_watch
sudo systemctl disable mem_watch
sudo rm /etc/systemd/system/mem_watch.service
sudo systemctl daemon-reload