#!/bin/sh

# Copyright (c) 2012 ken.naruo
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


## param setting
LANG=C;export LANG
SCRIPT_DIR=`dirname $0`
LOG_DIR=${SCRIPT_DIR}/../log
VMSTAT_LOG=${LOG_DIR}/vmstat.log_`date +%Y%m%d`
CPU_LOG=${LOG_DIR}/cpusage.log_`date +%Y%m%d`
MEMORY_LOG=${LOG_DIR}/memusage.log_`date +%Y%m%d`
DISK_USAGE_LOG=${LOG_DIR}/disk_usage.log_`date +%Y%m%d`
UNAME=`uname -n`
. ${SCRIPT_DIR}/../../../../.root
MONITOR_ERROR_QUE=${ROOT}/que/monitor/${UNAME}_error_`date +%Y%m%d`

JOB=`echo ${SCRIPT_DIR} | awk -F [/] '{field = $NF } END {print field }'`
JOBNET=`echo ${SCRIPT_DIR} | awk -F [/] '{field = $(NF-1) } END {print field }'`
PROJECT=`echo ${SCRIPT_DIR} | awk -F [/] '{field = $(NF-2) } END {print field}'`

## jobnet prof load
grep -v SCHEDULE ${SCRIPT_DIR}/../jobnet.prof > ${SCRIPT_DIR}/.jobnet_tmp
chmod 766 ${SCRIPT_DIR}/.jobnet_tmp
. ${SCRIPT_DIR}/.jobnet_tmp


## conf load
. ${SCRIPT_DIR}/monitor.conf


if [ ! -f ${CPU_LOG} ];then
 echo "time cpu_usage(%)" >  ${CPU_LOG}
fi

if [ ! -f ${MEMORY_LOG} ];then
 echo "time mem_usage(%)" >  ${MEMORY_LOG}
fi

## cpu resource get
echo "`date +%T`" >> ${VMSTAT_LOG}
IDLE=`vmstat | tee -a ${VMSTAT_LOG} | awk '{print $15}' | grep -v id`
CPU_USAGE=`expr 100 - ${IDLE}`
echo "`date +%T` ${CPU_USAGE}" >> ${CPU_LOG}

MONITOR_STATUS_QUE_CPU=${ROOT}/que/monitor/${PROJECT}_${JOBNET}_cpu_status

echo "`date +%Y/%m/%d` `date +%H:%M:%S` cpu usage is ${CPU_USAGE}%" > ${MONITOR_STATUS_QUE_CPU}

## cpu resource check
if [ "${CPU_USAGE}" -gt "${CPU_WARN}" -a "${CPU_USAGE}" -lt "${CPU_CRIT}" ];then

  echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN cpu usage is ${CPU_USAGE}" > ${MONITOR_STATUS_QUE_CPU}
  echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN cpu usage is ${CPU_USAGE} %" >> ${MONITOR_ERROR_QUE}

  if [ ! -f ${SCRIPT_DIR}/.cpu_warn.tmp ];then

    if [ "${MAIL_ADDR}" ];then
      TIME=`date +%s`
      MAIL_QUE=${ROOT}/que/mail/${PROJECT}_${JOBNET}_${JOB}_${TIME}
      echo "MAIL_ADDR:${MAIL_ADDR}" > ${MAIL_QUE}
      echo "SUBJECT:CPU WARN ${UNAME}" >> ${MAIL_QUE}
      echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN ${UNAME} cpu usage is ${CPU_USAGE} %" >> ${MAIL_QUE}
      echo "" >> ${MAIL_QUE}
      ps -aux >> ${MAIL_QUE}
    fi

    touch ${SCRIPT_DIR}/.cpu_warn.tmp

  fi

else

 if [ -f ${SCRIPT_DIR}/.cpu_warn.tmp ];then
   rm ${SCRIPT_DIR}/.cpu_warn.tmp
 fi

fi

if [ "${CPU_USAGE}" -gt "${CPU_CRIT}" ];then

  echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT cpu usage is ${CPU_USAGE}" > ${MONITOR_STATUS_QUE_CPU}
  echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT cpu usage is ${CPU_USAGE} %" >> ${MONITOR_ERROR_QUE}

  if [ ! -f ${SCRIPT_DIR}/.cpu_crit.tmp ];then

    if [ "${MAIL_ADDR}" ];then
      TIME=`date +%s`
      MAIL_QUE=${ROOT}/que/mail/${PROJECT}_${JOBNET}_${JOB}_${TIME}
      echo "MAIL_ADDR:${MAIL_ADDR}" > ${MAIL_QUE}
      echo "SUBJECT:CPU CRIT ${UNAME}">> ${MAIL_QUE}
      echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT ${UNAME} cpu usage is ${CPU_USAGE} %" >> ${MAIL_QUE}
      echo "" >> ${MAIL_QUE}
      ps -aux >> ${MAIL_QUE}

    fi

    touch ${SCRIPT_DIR}/.cpu_crit.tmp

  fi

else

 if [ -f ${SCRIPT_DIR}/.cpu_crit.tmp ];then
   rm ${SCRIPT_DIR}/.cpu_crit.tmp
 fi

fi


## memory resource get
TOTAL=`free | grep Mem | awk '{print $2}'`
USED=`free | grep Mem | awk '{print $3}'`
MEMORY_USAGE=`echo "scale=2;${USED} / ${TOTAL} * 100" | bc`
MEMORY_USAGE=`echo ${MEMORY_USAGE} | awk -F"." '{print $1}'`

echo "`date +%T` ${MEMORY_USAGE}" >> ${MEMORY_LOG}

MONITOR_STATUS_QUE_MEMORY=${ROOT}/que/monitor/${PROJECT}_${JOBNET}_memory_status

echo "`date +%Y/%m/%d` `date +%H:%M:%S` memory usage is ${MEMORY_USAGE}%" > ${MONITOR_STATUS_QUE_MEMORY}

## memory resource check
if [ "${MEMORY_USAGE}" -gt "${MEMORY_WARN}" -a "${MEMORY_USAGE}" -lt "${MEMORY_CRIT}" ];then

  echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN memory usage is ${MEMORY_USAGE}" > ${MONITOR_STATUS_QUE_MEMORY}
  echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN memory usage is ${MEMORY_USAGE} %" >> ${MONITOR_ERROR_QUE}

  if [ ! -f ${SCRIPT_DIR}/.memory_warn.tmp ];then

    if [ "${MAIL_ADDR}" ];then
      TIME=`date +%s`
      MAIL_QUE=${ROOT}/que/mail/${PROJECT}_${JOBNET}_${JOB}_${TIME}
      echo "MAIL_ADDR:${MAIL_ADDR}" > ${MAIL_QUE}
      echo "SUBJECT:MEMORY WARN ${UNAME}" >> ${MAIL_QUE}
      echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN ${UNAME} memory usage is ${MEMORY_USAGE} %" >> ${MAIL_QUE}
      echo "" >> ${MAIL_QUE}
      ps -aux >> ${MAIL_QUE}

    fi

    touch ${SCRIPT_DIR}/.memory_warn.tmp

  fi

else

 if [ -f ${SCRIPT_DIR}/.memory_warn.tmp ];then
   rm ${SCRIPT_DIR}/.memory_warn.tmp
 fi

fi

if [ "${MEMORY_USAGE}" -gt "${MEMORY_CRIT}" ];then

  echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT memory usage is ${MEMORY_USAGE}" > ${MONITOR_STATUS_QUE_MEMORY}
  echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT memory usage is ${MEMORY_USAGE} %" >> ${MONITOR_ERROR_QUE}

  if [ ! -f ${SCRIPT_DIR}/.memory_crit.tmp ];then

    if [ "${MAIL_ADDR}" ];then
      TIME=`date +%s`
      MAIL_QUE=${ROOT}/que/mail/${PROJECT}_${JOBNET}_${JOB}_${TIME}
      echo "MAIL_ADDR:${MAIL_ADDR}" > ${MAIL_QUE}
      echo "SUBJECT:MEMORY CRIT ${UNAME}">> ${MAIL_QUE}
      echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT ${UNAME} memory usage is ${MEMORY_USAGE} %" >> ${MAIL_QUE}
      echo "" >> ${MAIL_QUE}
      ps -aux >> ${MAIL_QUE}

    fi

    touch ${SCRIPT_DIR}/.memory_crit.tmp

  fi

else

 if [ -f ${SCRIPT_DIR}/.memory_crit.tmp ];then
   rm ${SCRIPT_DIR}/.memory_crit.tmp
 fi

fi


## disk usage
for FILE_SYSTEM in ` df -h | grep -v Filesystem | awk '{print $6}'`
do
  DISK_USAGE=`df -h ${FILE_SYSTEM} | grep -v Filesystem | awk '{print $5}'`
  echo "`date +%T` ${FILE_SYSTEM} ${DISK_USAGE}" >> ${DISK_USAGE_LOG}
  DISK_USAGE=`echo ${DISK_USAGE} | sed -e "s/%//g"`
  MONITOR_STATUS_QUE_DISK=${ROOT}/que/monitor/${PROJECT}_${JOBNET}_${FILE_SYSTEM_NAME}_disk_status

  echo "`date +%Y/%m/%d` `date +%H:%M:%S` ${FILE_SYSTEM} usage is ${DISK_USAGE}%" > ${MONITOR_STATUS_QUE_DISK}

  FILE_SYSTEM_NAME=`echo ${FILE_SYSTEM} | sed -e "s/\///g"`

  if [ "${FILE_SYSTEM_NAME}" = "" ];then
    FILE_SYSTEM_NAME="root"
  fi

  ## disk resource check
  if [ "${DISK_USAGE}" -gt "${DISK_WARN}" -a "${DISK_USAGE}" -lt "${DISK_CRIT}" ];then

    echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN ${FILE_SYSTEM} usage is ${DISK_USAGE}" > ${MONITOR_STATUS_QUE_DISK}
    echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN ${FILE_SYSTEM} usage is ${DISK_USAGE} %" >> ${MONITOR_ERROR_QUE}

    if [ ! -f ${SCRIPT_DIR}/.disk_warn_${FILE_SYSTEM_NAME}.tmp ];then

      if [ "${MAIL_ADDR}" ];then
        TIME=`date +%s`
        MAIL_QUE=${ROOT}/que/mail/${PROJECT}_${JOBNET}_${JOB}_${FILE_SYSTEM_NAME}_${TIME}
        echo "MAIL_ADDR:${MAIL_ADDR}" > ${MAIL_QUE}
        echo "SUBJECT:DISK WARN ${UNAME}" >> ${MAIL_QUE}
        echo "`date +%Y/%m/%d` `date +%H:%M:%S` WARN ${UNAME} ${FILE_SYSTEM} usage is ${DISK_USAGE} %" >> ${MAIL_QUE}
        echo "" >> ${MAIL_QUE}
        df -h >> ${MAIL_QUE}

      fi

      touch ${SCRIPT_DIR}/.disk_warn_${FILE_SYSTEM_NAME}.tmp

    fi

  else

   if [ -f ${SCRIPT_DIR}/.disk_warn_${FILE_SYSTEM_NAME}.tmp ];then
     rm ${SCRIPT_DIR}/.disk_warn_${FILE_SYSTEM_NAME}.tmp
   fi

  fi

  if [ "${DISK_USAGE}" -gt "${DISK_CRIT}" ];then

    echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT ${FILE_SYSTEM} usage is ${DISK_USAGE}" > ${MONITOR_STATUS_QUE_DISK}
    echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT ${FILE_SYSTEM} usage is ${DISK_USAGE} %" >> ${MONITOR_ERROR_QUE}

    if [ ! -f ${SCRIPT_DIR}/.disk_crit_${FILE_SYSTEM_NAME}.tmp ];then

      if [ "${MAIL_ADDR}" ];then
        TIME=`date +%s`
        MAIL_QUE=${ROOT}/que/mail/${PROJECT}_${JOBNET}_${JOB}_${FILE_SYSTEM_NAME}_${TIME}
        echo "MAIL_ADDR:${MAIL_ADDR}" > ${MAIL_QUE}
        echo "SUBJECT:DISK CRIT ${UNAME}">> ${MAIL_QUE}
        echo "`date +%Y/%m/%d` `date +%H:%M:%S` CRIT ${UNAME} ${FILE_SYSTEM} usage is ${DISK_USAGE} %" >> ${MAIL_QUE}
        echo "" >> ${MAIL_QUE}
        df -h >> ${MAIL_QUE}

      fi

      touch ${SCRIPT_DIR}/.disk_crit_${FILE_SYSTEM_NAME}.tmp

    fi

  else

   if [ -f ${SCRIPT_DIR}/.disk_crit_${FILE_SYSTEM_NAME}.tmp ];then
     rm ${SCRIPT_DIR}/.disk_crit_${FILE_SYSTEM_NAME}.tmp
   fi

  fi

done

find ${SCRIPT_DIR} -type f -name *.tmp -mtime +0 |  xargs rm -f
exit 0

