进程监控脚本

94 阅读2分钟

#!/bin/bash

#获取脚本所在目录

function GetHome #

{

PRG="$0"

## echo $PRG

while [ -h "$PRG" ]; do

  ls=`ls -ld "$PRG"`

  link=`expr "$ls" : '.*-> \(.*\)$'`

##  echo $link

  if expr "$link" : '/.*' > /dev/null; then

     PRG="$link"

  else

     PRG=`dirname "$PRG"`/"$link"

  fi

done

PRGDIR=`dirname "$PRG"`

# [ -z "$HOME" ] && HOME=`cd "$PRGDIR" >/dev/null; pwd`

HOME=`cd "$PRGDIR" >/dev/null; pwd`

echo $HOME

}

\

APP_HOME=`GetHome`

cd $APP_HOME

#存放日志

FILE_LOG="$APP_HOME/logs/log.log"

#存放错误日志

ERROR_LOG="$APP_HOME/logs/error.log"

#存放发出的emmail内容

EMAIL_LOG="$APP_HOME/logs/email.log"

#发出的邮件备份

EMAIL_LOG_BAK="$APP_HOME/logs/email.bak.log"

path=`pwd`

#cat $path/hosts|while read line 

#do

#IFS='\n'

#hosts放到脚本同目录中,内容看脚本下面的备注

for line in $(cat $path/hosts);do  

hostName=`echo $line|awk -F '-' '{print $1}'`

userName=`echo $line|awk -F '-' '{print $2}'`

processName=`echo $line|awk -F '-' '{print $3}'`

ppid=`echo $line|awk -F '-' '{print $4}'`

rptime=`date "+%Y-%m-%d %H:%M:%S"`

#pid=`ssh $hostName ps -ef|grep ^$userName|grep $processName|grep -v grep|grep -v vi|grep -v dbx|grep -v tail|grep -v start|grep -v stop |sed -n 1p |awk '{print $2}' `

#pid=`ssh $hostName ps -ef|grep ^$userName|grep $processName|grep -v grep|grep -v vi|grep -v dbx|grep -v tail|grep -v start|grep -v stop |sed -n 1p |awk '{print $2}' `

pid=`ssh $hostName ps -ef|grep ^$userName|grep $processName|grep -v grep|grep -v stop |sed -n 1p |awk '{print $2}' `

\

if [ "-$pid" == "-" ]; then 

    { 

    content="大家好:$hostName上面的$processName进程,在$rptime停止运行了,请相关人员查看!"

    echo $content >> $EMAIL_LOG

    }

elif [ "$pid" != "$ppid" ]; then 

{

processInfo=`ssh $hostName ps -ef|grep ^$userName|grep $processName|grep -v grep|grep -v vi|grep -v dbx|grep -v tail|grep -v start|grep -v stop |sed -n 1p |awk '{print $0}' `

content="大家好:$hostName上面的$processName进程,在$rptime之前重启过,原来进程号:$ppid,现在进程号:$pid,请相关人员查看,并将监控脚本中的$hostName-$userName-$processName-$ppid修改为$hostName-$userName-$processName-$pid!进程信息:$processInfo"

\

echo $content >> $EMAIL_LOG

}

else

    echo $rptime - $hostName - $userName - $processName-pid:$pid" Check Ok." >> $FILE_LOG

fi 

done

\

RECEIVERS="515256@qq.com cyxinda@163.com"

if [[ -f $EMAIL_LOG ]]; then

if [ `cat $EMAIL_LOG | wc -l`  -gt  0 ] 

then

export LANG=zh_CN.UTF-8

mailx -s "some process of hadoop is down..." $RECEIVERS < $EMAIL_LOG -- -f mon.tom-ora@fone.net.cn

  if [ $? -eq 0 ]

  then

    cat $EMAIL_LOG >> $EMAIL_LOG_BAK

    cat /dev/null > $EMAIL_LOG 

  fi

fi

fi

\

\

\

\

\

\

\

\

\

2,备注1:hosts文件内容:

格式:主机名-进程所属用户-进程名称-进程号

namenode1-hadoop-JobHistoryServer-2337

namenode1-hadoop-ResourceManager-2581

namenode1-hadoop-JournalNode-1842

namenode1-hadoop-NameNode-1121

namenode2-hadoop-NameNode-20404

namenode2-hadoop-JournalNode-20622

datanode1-hadoop-JournalNode-46540

datanode1-hadoop-NodeManager-46736

datanode1-hadoop-DataNode-46220

datanode4-hadoop-DataNode-42854

datanode4-hadoop-NodeManager-9680

datanode4-hadoop-JournalNode-42976

datanode5-hadoop-DataNode-24849

datanode5-hadoop-NodeManager-2413

datanode5-hadoop-JournalNode-24529

datanode6-hadoop-DataNode-34364

datanode6-hadoop-NodeManager-20315

datanode7-hadoop-DataNode-24874

datanode7-hadoop-NodeManager-21257

\

\

邮件格式一:

大家好:datanode4上面的NodeManager进程,在2014-07-24 10:15:05停止运行了,请相关人员查看!
大家好:datanode5上面的NodeManager进程,在2014-07-24 10:15:06停止运行了,请相关人员查看!
大家好:datanode6上面的NodeManager进程,在2014-07-24 10:15:07停止运行了,请相关人员查看!
邮件格式2:
大家好:datanode4上面的NodeManager进程,在2014-07-24 10:20:05之前重启过,原来进程号:33711,现在进程号:9650,请相关人员查看,并将监控脚本中的datanode4-hadoop-NodeManager-33311修改为datanode4-hadoop-NodeManager-9650!进程信息:hadoop 9650 1 22 10:16 pts/0 00:00:43 /usr/local/jdk1.7.0_51/bin/java -Dproc_nodemanager -Xmx1000m -Dhadoop.log.dir=/opt/hadoop.....
大家好:datanode5上面的NodeManager进程,在2014-07-24 10:20:08之前重启过,原来进程号:32960,现在进程号:2313,请相关人员查看,并将监控脚本中的datanode5-hadoop-NodeManager-32960修改为datanode5-hadoop-NodeManager-2313!进程信息:hadoop 2313 1 29 10:15 pts/0 00:01:24 /usr/local/jdk1.7.0_51/bin/java -Dproc_nodemanager -Xmx1000m -Dhadoop.log.dir=/opt/hadoop.....
.
.
.