#!/bin/sh # # J.P.Boggis 06/10/2004: Script to loop through a list of hosts, determine # status of host (By pinging it) and send an alert # E-mail if neccessary. # # Current status of all hosts can be shown using: # # hostmon status # # This script should be called on a regular basis from cron (E.g: Once every # 5 minutes.) Example /etc/crontab entry: # # 0-55/5 * * * * root /etc/hostmon/hostmon # # Hosts to monitor should be added to a file named 'hosts' in the same working # directory as the hostmon script (/etc/hostmon recommended.) # # This file contains host entries in the format below. Each column must be # TAB separated. Comments may be added by beginning a line with #. Blank # lines are not allowed (Unless preceeded with a #) # # Hostname: Location: Type: O/S: Description: # ~~~~~~~~~ ~~~~~~~~~ ~~~~~ ~~~~ ~~~~~~~~~~~~ # linux.local Server Room Server Linux File Server # adsl.local Server Room Device ADSL Internet Connection # pc1.local Office PC Linux Fred's PC # pc2.local Office PC Win2K Joe's PC # ps.local Office Server Print Office Print Server # # NOTE: Hostnames must be resolvable via DNS or /etc/hosts, otherwise use # IP addresses instead. # Number of pings to send to each host (Average time will be used for # monitoring/alerting purposes.) PingCount=10 # Send warning notification if average response time (ms) exceeds this value. WarnTime=400 # Send warning if packet loss exeeds this value WarnLoss=25 # Alert E-mail subject prefix EmailSubject="ALERT: " # Alert E-mail sender (Must be ran as privileged user, otherwise leave blank) EmailSenderName="Host Monitor" EmailSender="alert@jcdigita.com" # Alert E-mail recipient(s) EmailRcpt="email@jcdigita.com" # Keep log of alerts? Logging=1 LogFile="/var/log/hostmon" # Show debugging output Debug=0 # Check file of hosts to monitor exists HOSTPATH="`echo "$0" |sed -e "s~/[^/]\+\$~~g"`" if [ "$HOSTPATH" = "$0" ]; then HOSTPATH="."; fi if [ ! -e "$HOSTPATH/hosts" ]; then echo "Error: Host monitor file '$HOSTPATH/hosts' not found." exit 1 fi # Check directory for status files exists (Attempt to create if not) if [ ! -d "$HOSTPATH/status" ]; then mkdir $HOSTPATH/status if [ ! -d "$HOSTPATH/status" ]; then echo "Error: Unable to create host status directory '$HOSTPATH/status'." exit 1 fi fi # Show current host status if [ "$1" = "status" ] || [ "$1" = "STATUS" ]; then for STAT in `ls $HOSTPATH/status`; do echo -e "`cat $HOSTPATH/status/$STAT`\t$STAT" done exit 0 fi # Loop through each host and test STARTTIME="`date +%s`" HOSTLIST="`cat ${HOSTPATH}/hosts |grep -v "^#" |sed -e "s/|//g" -e "s/[[:space:]]*\t\+[[:space:]]*/|/g" -e "s/[[:space:]]\+/-~-/g"`" for LINE in $HOSTLIST; do LINELIST="`echo "$LINE" |sed -e "s/|/ /g"`" # Get host data ItemNo=1 for ITEM in $LINELIST; do case $ItemNo in 1) HostName="`echo "$ITEM" |sed -e "s/[[:space:]]\+//g"`" ;; 2) Location="`echo "$ITEM" |sed -e "s/-~-/ /g"`" ;; 3) Type="`echo "$ITEM" |sed -e "s/-~-/ /g"`" ;; 4) OS="`echo "$ITEM" |sed -e "s/-~-/ /g"`" ;; 5) Desc="`echo "$ITEM" |sed -e "s/-~-/ /g"`" ;; esac ItemNo=$[ $ItemNo + 1] done; if [ $ItemNo -gt 5 ]; then # Ping host if [ $Debug -eq 1 ]; then echo "Testing $HostName ($Location $Desc $OS $Type)..." fi Time="`date`" PINGDATA="`ping -c $PingCount $HostName 2>&1`" PingOK="`echo "$PINGDATA" |grep "transmitted"`" if [ $Debug -eq 1 ]; then echo "$PINGDATA" fi if [ "$PingOK" != "" ]; then # Get packet stats TXPackets="`echo "$PINGDATA" |awk '/transmitted/{print $1}'`" RXPackets="`echo "$PINGDATA" |awk '/transmitted/{print $4}'`" RoundTrip="`echo "$PINGDATA" |awk '/(round-trip|rtt)/{print $4}' |sed -e "s/%//g"`" MinTime="`echo "$RoundTrip" |cut -f1 -d'/'`" AvgTime="`echo "$RoundTrip" |cut -f2 -d'/'`" MaxTime="`echo "$RoundTrip" |cut -f3 -d'/'`" RoundAvg="`echo "$RoundTrip" |cut -f1 -d'/' |sed -e "s/\..\+\$//g"`" if [ "$TXPackets" = "" ]; then TXPackets=0; fi if [ "$RXPackets" = "" ]; then RXPackets=0; fi if [ "$MinTime" = "" ]; then MinTime=9999; fi if [ "$AvgTime" = "" ]; then AvgTime=9999; fi if [ "$MaxTime" = "" ]; then MaxTime=9999; fi if [ $TXPackets -gt 0 ]; then PacketLoss=$[ $RXPackets * 100 / $TXPackets ] else PacketLoss=100 fi PacketLoss=$[ 100 - $PacketLoss ] else TXPackets=0 RXPackets=0 PacketLoss=100 MinTime=9999 AvgTime=9999 MaxTime=9999 RoundAvg=9999 fi if [ $Debug -eq 1 ]; then echo " Results: $TXPackets TX, $RXPackets RX, ${PacketLoss}% loss ($MinTime/$AvgTime/$MaxTime)" fi # Get last status of host if [ -e "$HOSTPATH/status/$HostName" ]; then STATUS="`cat $HOSTPATH/status/$HostName`" else STATUS="UP" fi # Determine current status of host if [ $PacketLoss -ge 100 ]; then Status="DOWN" elif [ $RoundAvg -gt $WarnTime ]; then Status="TIMEOUT" elif [ $PacketLoss -gt $WarnTime ]; then Status="LOSS" else Status="UP" fi # Send warning? if [ "$Status" != "$STATUS" ]; then if [ "$Status" == "DOWN" ]; then StatusType="DOWN" StatusDesc="has gone down (No response)" elif [ "$Status" = "TIMEOUT" ]; then StatusType="FAILING" StatusDesc="is experiencing high average response time (${AvgTime}ms)" elif [ "$Status" = "LOSS" ]; then StatusType="FAILING" StatusDesc="is experiencing high packet loss (${PacketLoss}%)" elif [ "$Status" = "UP" ]; then StatusType="UP" StatusDesc="is back up again" else StatusType="UNKNOWN" StatusDesc="has generated an unknown alert condition" fi ServerDesc="$Type $HostName ($OS, $Desc) in $Location" if [ $Debug -eq 1 ]; then echo " ${EmailSubject}[$StatusType] $ServerDesc ${StatusDesc}." fi # Generate E-mail message Subject="${EmailSubject}[$StatusType] $ServerDesc ${StatusDesc} at $Time" Message="$ServerDesc $StatusDesc at $Time:\n\n$PINGDATA" # Send E-mail if [ "$EmailSender" != "" ]; then echo -e "$Message" |mail -a "From: \"$EmailSenderName\" <$EmailSender>" -a "Reply-to: \"$EmailSenderName\" <$EmailSender>" $EmailSender -s "$Subject" $EmailRcpt else echo -e "$Message" |mail $EmailSender -s "$Subject" $EmailRcpt fi # Add to log if [ $Logging -eq 1 ]; then echo "`date +\"%d/%m/%Y %H:%M.%S\"`: $ServerDesc ${StatusDesc}." >> $LogFile fi fi echo "$Status" > $HOSTPATH/status/$HostName else echo "Invalid host data: `echo "$LINELIST" |sed -e "s/[[:space:]]\+/, /g" -e "s/-~-/ /g"`" fi done; if [ $Debug -eq 1 ]; then ENDTIME="`date +%s`" TIMEDIFF=$[ $ENDTIME - $STARTTIME ] echo " Total time elapsed: $[ $TIMEDIFF / 60 ]m $[ $TIMEDIFF % 60]s" fi