#!/bin/bash
#
# J.P.Boggis 06/10/2004:  Script to loop through a list of hosts, determine
#			  status of host (By pinging it) and send an alert
#			  E-mail if neccessary.
#
#			  Current status of all hosts can be shown using:
#
#		             hostmon status
#
# This script should be called on a regular basis from cron (E.g:  Once every
# 5 minutes.)  Example /etc/crontab entry:
#
#   0-55/5 * * * *   root  /etc/hostmon/hostmon
#
# Hosts to monitor should be added to a file named 'hosts' in the same working
# directory as the hostmon script (/etc/hostmon recommended.)
#
# This file contains host entries in the format below.  Each column must be
# TAB separated.  Comments may be added by beginning a line with #.  Blank
# lines are not allowed (Unless preceeded with a #)
#
# Hostname:             Location:       Type:   O/S:    Description:
# ~~~~~~~~~             ~~~~~~~~~       ~~~~~   ~~~~    ~~~~~~~~~~~~
# linux.local		Server Room	Server	Linux	File Server
# adsl.local		Server Room	Device	ADSL	Internet Connection
# pc1.local		Office		PC	Linux	Fred's PC
# pc2.local		Office		PC	Win2K	Joe's PC
# ps.local		Office		Server	Print	Office Print Server
#
# NOTE:  Hostnames must be resolvable via DNS or /etc/hosts, otherwise use
#	 IP addresses instead.


# Number of pings to send to each host (Average time will be used for
# monitoring/alerting purposes.)
PingCount=10

# Send warning notification if average response time (ms) exceeds this value.
WarnTime=400

# Send warning if packet loss exeeds this value
WarnLoss=25

# Alert E-mail subject prefix
EmailSubject="ALERT:  "

# Alert E-mail sender (Must be ran as privileged user, otherwise leave blank)
EmailSenderName="Host Monitor"
EmailSender="alert@jcdigita.com"

# Alert E-mail recipient(s)
EmailRcpt="email@jcdigita.com"

# Keep log of alerts?
Logging=1
LogFile="/var/log/hostmon"

# Show debugging output
Debug=0


# Check file of hosts to monitor exists
HOSTPATH="`echo "$0" |sed -e "s~/[^/]\+\$~~g"`"
if [ "$HOSTPATH" = "$0" ]; then HOSTPATH="."; fi
if [ ! -e "$HOSTPATH/hosts" ]; then
   echo "Error:  Host monitor file '$HOSTPATH/hosts' not found."
   exit 1
fi


# Check directory for status files exists (Attempt to create if not)
if [ ! -d "$HOSTPATH/status" ]; then
   mkdir $HOSTPATH/status
   if [ ! -d "$HOSTPATH/status" ]; then
      echo "Error:  Unable to create host status directory '$HOSTPATH/status'."
      exit 1
   fi
fi   


# Show current host status
if [ "$1" = "status" ] || [ "$1" = "STATUS" ]; then
   for STAT in `ls $HOSTPATH/status`; do
       echo -e "`cat $HOSTPATH/status/$STAT`\t$STAT"
   done
   exit 0
fi


# Loop through each host and test
STARTTIME="`date +%s`"
HOSTLIST="`cat ${HOSTPATH}/hosts |grep -v "^#" |sed -e "s/|//g" -e "s/[[:space:]]*\t\+[[:space:]]*/|/g" -e "s/[[:space:]]\+/-~-/g"`"

for LINE in $HOSTLIST; do
    LINELIST="`echo "$LINE" |sed -e "s/|/ /g"`"
    
    # Get host data
    ItemNo=1
    for ITEM in $LINELIST; do
        case $ItemNo in
	     1)
	        HostName="`echo "$ITEM" |sed -e "s/[[:space:]]\+//g"`" 
		;;
             2)
	        Location="`echo "$ITEM" |sed -e "s/-~-/ /g"`"
		;;
             3)
	        Type="`echo "$ITEM" |sed -e "s/-~-/ /g"`"
		;;
	     4)
	        OS="`echo "$ITEM" |sed -e "s/-~-/ /g"`"
		;;
	     5)
	        Desc="`echo "$ITEM" |sed -e "s/-~-/ /g"`"
		;;
	esac
	ItemNo=$[ $ItemNo + 1]
    done;

    if [ $ItemNo -gt 5 ]; then
    
       # Ping host
       if [ $Debug -eq 1 ]; then
          echo "Testing $HostName ($Location $Desc $OS $Type)..."
       fi
       Time="`date`"
       PINGDATA="`ping -c $PingCount $HostName 2>&1`"
       PingOK="`echo "$PINGDATA" |grep "transmitted"`"
       if [ $Debug -eq 1 ]; then
          echo "$PINGDATA"
       fi

       if [ "$PingOK" != "" ]; then

          # Get packet stats
          TXPackets="`echo "$PINGDATA" |awk '/transmitted/{print $1}'`"
          RXPackets="`echo "$PINGDATA" |awk '/transmitted/{print $4}'`"
          RoundTrip="`echo "$PINGDATA" |awk '/(round-trip|rtt)/{print $4}' |sed -e "s/%//g"`"
	  
          MinTime="`echo "$RoundTrip" |cut -f1 -d'/'`"
          AvgTime="`echo "$RoundTrip" |cut -f2 -d'/'`"
          MaxTime="`echo "$RoundTrip" |cut -f3 -d'/'`"
	  RoundAvg="`echo "$RoundTrip" |cut -f1 -d'/' |sed -e "s/\..\+\$//g"`"

	  if [ "$TXPackets" = "" ]; then TXPackets=0; fi
	  if [ "$RXPackets" = "" ]; then RXPackets=0; fi
	  if [ "$MinTime" = "" ]; then MinTime=9999; fi
	  if [ "$AvgTime" = "" ]; then AvgTime=9999; fi
	  if [ "$MaxTime" = "" ]; then MaxTime=9999; fi

          if [ $TXPackets -gt 0 ]; then
   	     PacketLoss=$[ $RXPackets * 100 / $TXPackets ]
	  else
	     PacketLoss=100
	  fi
	  PacketLoss=$[ 100 - $PacketLoss ]
       else
          TXPackets=0
	  RXPackets=0
	  PacketLoss=100
	  MinTime=9999
	  AvgTime=9999
	  MaxTime=9999
	  RoundAvg=9999
       fi
       
       if [ $Debug -eq 1 ]; then
          echo "  Results: $TXPackets TX, $RXPackets RX, ${PacketLoss}% loss ($MinTime/$AvgTime/$MaxTime)"
       fi

       # Get last status of host
       if [ -e "$HOSTPATH/status/$HostName" ]; then
          STATUS="`cat $HOSTPATH/status/$HostName`"
       else
          STATUS="UP"
       fi

       # Determine current status of host
       if [ $PacketLoss -ge 100 ]; then
          Status="DOWN"
       elif [ $RoundAvg -gt $WarnTime ]; then
          Status="TIMEOUT"
       elif [ $PacketLoss -gt $WarnTime ]; then
          Status="LOSS"
       else
          Status="UP"
       fi

       # Send warning?
       if [ "$Status" != "$STATUS" ]; then
          if [ "$Status" == "DOWN" ]; then
	     StatusType="DOWN"
	     StatusDesc="has gone down (No response)" 
          elif [ "$Status" = "TIMEOUT" ]; then
	     StatusType="FAILING"
	     StatusDesc="is experiencing high average response time (${AvgTime}ms)"
          elif [ "$Status" = "LOSS" ]; then
	     StatusType="FAILING"
	     StatusDesc="is experiencing high packet loss (${PacketLoss}%)"
          elif [ "$Status" = "UP" ]; then
	     StatusType="UP"
             StatusDesc="is back up again"
	  else
	     StatusType="UNKNOWN"
	     StatusDesc="has generated an unknown alert condition"
          fi

          ServerDesc="$Type $HostName ($OS, $Desc) in $Location"
          if [ $Debug -eq 1 ]; then
	     echo "  ${EmailSubject}[$StatusType]  $ServerDesc ${StatusDesc}."
	  fi

          # Generate E-mail message
	  Subject="${EmailSubject}[$StatusType]  $ServerDesc ${StatusDesc} at $Time"
	  Message="$ServerDesc $StatusDesc at $Time:\n\n$PINGDATA"

          # Send E-mail
          if [ "$EmailSender" != "" ]; then
	     echo -e "$Message" |mail -a "From: \"$EmailSenderName\" <$EmailSender>" -a "Reply-to: \"$EmailSenderName\" <$EmailSender>" $EmailSender -s "$Subject" $EmailRcpt
	  else
	     echo -e "$Message" |mail $EmailSender -s "$Subject" $EmailRcpt
	  fi

	  # Add to log
	  if [ $Logging -eq 1 ]; then
	     echo "`date +\"%d/%m/%Y %H:%M.%S\"`:  $ServerDesc ${StatusDesc}." >> $LogFile
	  fi
       fi
       echo "$Status" > $HOSTPATH/status/$HostName
    else
       echo "Invalid host data:  `echo "$LINELIST" |sed -e "s/[[:space:]]\+/, /g" -e "s/-~-/ /g"`"
    fi
done;

if [ $Debug -eq 1 ]; then
   ENDTIME="`date +%s`"
   TIMEDIFF=$[ $ENDTIME - $STARTTIME ]
   echo "  Total time elapsed:  $[ $TIMEDIFF / 60 ]m $[ $TIMEDIFF % 60]s"
fi
