#!/bin/sh
#
#	Basic tests of sanity for a newly-built version of
#	linux-HA software (heartbeat)
#
#	Conditions for running:
#
#	Heartbeat must be installed.
#
#	Must be root.
#
#	CANNOT have a real heartbeat configuration.
#
#	Must have networking configured with one working
#		network interface.
#
#	should not have $TESTIP below used for anything ;-)
#
#	should have multicast address $MCASTIP port 694
#		available
#		(you don't need a multicast capable router).
#
#
# Note: you might have to change TESTIP and MCASTIP
#
TESTIP=10.253.252.251
MCASTIP=225.224.223.222
#
IFCONFIG="/sbin/ifconfig "
HADIR=/etc/ha.d
HBSCRIPT=/etc/init.d/heartbeat
STONITH=/usr/sbin/stonith
LIBDIR=/usr/lib/
HBLIB=$LIBDIR/heartbeat
APPHBD="$HBLIB/apphbd -d"
CLSTATUS=/usr/bin/cl_status
APPHBTEST=$HBLIB/apphbtest
IPCTEST=$HBLIB/ipctest
SNMPAGENTTEST=$HBLIB/SNMPAgentSanityCheck
MALLOC_CHECK_=2; export MALLOC_CHECK_
#
DEFAULTINTERFACE=eth0	# But we really guess it...
#
IDENTSTRING="Linux-HA TEST configuration file - REMOVEME!!"
DUMMYNODE=ImAlwaysDead.com
LOCALNODE=`uname -n`
LOGFILE=/var/run/linux-ha.testlog
RSCDIR=$HADIR/resource.d
errcount=0

cd $HADIR
ulimit -c unlimited

GetAllIFNames() {
  $IFCONFIG | grep '^[a-zA-Z]' | sed 's%:* .*%%'
}

GuessIFname() {
  GetAllIFNames | grep -v '^lo' | head -n 1
}

INTERFACE=`GuessIFname`

case $INTERFACE in
  "")	INTERFACE=$DEFAULTINTERFACE;;
esac

#
#	Is it safe to overwrite this config file?
#
CheckConfigFile() {
  if
    [ ! -f $1 ]
  then
    return 0
  fi
  if
    sync
    grep "$IDENTSTRING" $1 >/dev/null 2>&1
  then
    return 0
  else
    return 1
  fi
}

GenerateAuthKeys() {
  cat <<-! >$1
	#	$IDENTSTRING
	#
	auth 1
	1 sha1 SuperSecretKey--SHHH!!!
	2 md5  Encript-string-for-md5!!
	3 crc
	!
  chmod 600 $1
}

GenerateHaCf() {
  cat <<-! >$1
	#	$IDENTSTRING
	logfile   $LOGFILE
	debugfile $LOGFILE
	keepalive 10ms
	debug 1
	deadtime 10
	initdead 10
	auto_failback on
	stonith_host * null $LOCALNODE $DUMMYNODE
	mcast $INTERFACE $MCASTIP 694 0 0
	#bcast $INTERFACE
	node $LOCALNODE $DUMMYNODE
	apiauth lha-snmpagent uid=root
	!
  chmod 644 $1
}

GenerateHaResources() {
  cat <<-! >$1
	#	$IDENTSTRING
	$DUMMYNODE IPaddr::$TESTIP/30
	!
  chmod 644 $1
}
CONFIGFILES="ha.cf authkeys haresources"

SetUpConfigFiles() {
  if
    HBStatus
  then
    echo "Should not run tests with heartbeat already running."
  fi
  SaveConfigFiles
  for j in $CONFIGFILES
  do
    if
      CheckConfigFile $HADIR/$j
    then
      : OK
    else
      echo "OOPS! $HADIR/$j already exists!"
      echo "Real configuration already set up."
      echo "Sorry..."
      exit 1
    fi
  done

  GenerateAuthKeys $HADIR/authkeys
  GenerateHaCf $HADIR/ha.cf
  GenerateHaResources $HADIR/haresources
  rm -f $HADIR/core
}

RemoveConfigFiles() {
  for j in $CONFIGFILES
  do
    if
      CheckConfigFile $HADIR/$j
    then
      rm -f $HADIR/$j
    else
      echo "OOPS! Cannot remove real config file $HADIR/$j!"
    fi
  done
  RestoreConfigFiles
}

SaveConfigFiles() {
  cd $HADIR
  if
    [ ! -d .cfsave ]
  then
    mkdir .cfsave
  fi
  mv $CONFIGFILES .cfsave >/dev/null 2>&1
}

RestoreConfigFiles() {
  mv $HADIR/.cfsave/* $HADIR
}

HBStart() {
  echo "Starting heartbeat"
  $HBSCRIPT start
}

HBStop() {
  echo "Stopping heartbeat"
  $HBSCRIPT stop
}

HBReload() {
  echo "Reloading heartbeat"
  $HBSCRIPT reload > /dev/null 2>&1
  rc=$?
  sleep 5
  return $rc
}

HBStatus() {
  case `$HBSCRIPT status 2>&1` in
    *running*)	true;;
    *)		false;;
  esac
}

#
#	Search the log file for the given grep pattern
#
LookForString() {
  count=1
  while
    if
      grep -i "$1" $LOGFILE
    then
      return 0
    fi
   [ $count -lt 60 ]
  do
    count=`expr $count + 1`
    sleep 1
  done
  return 1
}

changeAuthkeys(){
 awk 'BEGIN{method = 1; done =0 ; pass=1} \
{ if(pass == 2){ if ($1 == "auth")print "auth " method; \
else print $0 ; next } \
if (done ==1 || $1 == "#" || $1 == " ") next; \
if ($1 == "auth") {method = $2; next} \
if ($1 !=  method) {done=1; method =$1;  pass=2; nextfile}}' \
$HADIR/authkeys $HADIR/authkeys > /var/run/tmpfile

mv /var/run/tmpfile  $HADIR/authkeys
chmod 600 $HADIR/authkeys
}
#	Check for the given count of the given string
#	Complain unless the right number are there.
CheckPat()
{
  count=`egrep -ic "$1" $LOGFILE`
  min=$2
  if
    [ $# -gt 2 ]
  then
    max=$3
  else
    max=$2
  fi
  if
    [ $count -lt $min -o $count -gt $max ]
  then
    echo "ERROR: Did not find [$2:$3] occurances of $1 in $LOGFILE `date`" 2>&1 | tee -a $LOGFILE
    echo "ERROR: Found $count instead." | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
}

TestHeartbeat() {
  if
    HBStatus
  then
    echo "That's weird.  Heartbeat seems to be running..."
    HBStop
  fi
  if
    $CLSTATUS hbstatus >/dev/null 2>&1
  then
    echo "$CLSTATUS shows heartbeat running" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
   
  if
    HBStart
  then
    if
      HBStatus
    then
     : COOL!
    else
     echo "Heartbeat did not start." | tee -a $LOGFILE
     exit 1
    fi
  fi
  for j in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
  do
    if
      $CLSTATUS hbstatus >/dev/null 2>&1
    then
       break
    else
      sleep 1
    fi
  done

  if
    $CLSTATUS hbstatus >/dev/null 2>&1
  then
   : OK
  else
    echo "$CLSTATUS shows heartbeat not running ($?)" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    $CLSTATUS nodestatus $LOCALNODE >/dev/null 2>&1
  then
   : OK
  else
    echo "$CLSTATUS shows local status as dead ($?)" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  #
  # Heartbeat seems to be running...
  #

  if
    LookForString "node $DUMMYNODE.*is dead" >/dev/null
  then
    : OK
  else
    echo "Does not look like we noticed $DUMMYNODE was dead" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    $CLSTATUS nodestatus $DUMMYNODE >/dev/null 2>&1
  then
    echo "$CLSTATUS shows $DUMMYNODE status as alive(!)" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi


  if
    LookForString "Resetting node $DUMMYNODE with" >/dev/null &&
    LookForString "node $DUMMYNODE now reset" >/dev/null
  then
    : OK
  else
    echo "Does not look like we STONITHed $DUMMYNODE" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  
  if
    LookForString "IPaddr.*$TESTIP" >/dev/null
  then
    :  Looks good
  else
    echo "Does not look like we took over the IP address" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  #	Wait until heartbeat thinks things are stable
  #	that is, not in "transition"
  for j in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
  do
    if
      STAT=`$CLSTATUS rscstatus 2>/dev/null`
    then
      case "$STAT" in
        transition)
		sleep 1
		;;
        all)	break
		;;
        *)	echo "$CLSTATUS shows resource status as $STAT" | tee -a $LOGFILE
		break
		;;
      esac
    else
      echo "$CLSTATUS rscstatus failed [$?]" | tee -a $LOGFILE
      break
    fi
  done
  if
    LookForString "[Aa][Rr][Pp]" >/dev/null
  then
    :  Looks good
  else
    echo "Does not look like we ARPed the address" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 &&
    $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1
  then
    : COOL!
  else
    echo "Looks like monitor operation failed" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  #
  #	Reload test -- ha.cf changed
  #
  echo "Performing ha.cf reload test" >> $LOGFILE
  touch $HADIR/ha.cf
  if
    HBReload
  then
    : OK! reload after touching ha.cf worked!
  else
    echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    LookForString "restart exec" >/dev/null
  then
    :  Looks good
  else
    echo "Does not look like we did a restart exec." | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    HBStatus
  then
    : "OK - reload didn't kill anything ;-)"
  fi
  #
  #	Reload test -- authkeys changed
  #
  echo "Performing authkeys reload test" >> $LOGFILE
  changeAuthkeys
  if
    HBReload
  then
    : OK! reload after changing authkeys worked!
  else
    echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    CheckPat "Signalling.* to reread config files" 2 >/dev/null
  then
    : OK
  else
    echo "Heartbeat did not reread config files exactly twice" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  sleep 2
  if
    CheckPat "restart exec" 1 >/dev/null
  then
    :  Looks good -- did not do another exec
  else
    echo "Looks like we did an extra exec" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    HBStatus
  then
    : "OK - reload didn't kill anything ;-)"
  fi
  
  echo "Stopping heartbeat." >> $LOGFILE
  if
    HBStop
  then
    : OK!
  else
    echo "Heartbeat stop operation returned $?" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    HBStatus
  then
    echo "Looks like heartbeat did not really stop." | tee -a $LOGFILE
    echo "You\'ll probably need to kill some processes yourself."
    errcount=`expr $errcount + 1`
  fi

  if
    $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 &&
    $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1
  then
    echo "Looks like the test IP address is still live..."
    errcount=`expr $errcount + 1`
  fi

}

StonithCheck() {

  serrcount=0

  echo "Checking STONITH basic sanity." | tee -a $LOGFILE
  if
    $STONITH -h >/dev/null
  then
    : OK
  else
    echo "$STONITH -h failed" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi
  wc=`$STONITH -h | wc -l`
  if
    [ $wc -lt 100 ]
  then
    echo "$STONITH -h help message is too short" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi
  if
    FOOBARLIST=`$STONITH -t null -p "foo bar" -l`
  then
    : FOOBARLIST OK
  else
    echo "$STONITH -t null list option failed" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi
  if
    echo $FOOBARLIST | grep foo >/dev/null &&
    echo $FOOBARLIST | grep bar >/dev/null
  then
    : OK null list
  else
    echo "$STONITH -t null list option incorrect" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi

  if
    RESETOUT=`$STONITH -t null -p "foo bar" foo 2>&1`
  then
    case $RESETOUT in
     *"Host foo null-reset"*)	: NULL Stonith output OK;;
     *)				echo "NULL reset failed."
  				  serrcount=`expr $serrcount + 1`;;
    esac
  else
    echo "$STONITH -t null reset failed." | tee -a $LOGFILE
  fi
  errcount=`expr $errcount + $serrcount`
}


AppHBCheck() {
	CFFILE=/var/run/$$.apphbd.cf
	clientcount=5
	cat <<-! >$CFFILE
		realtime yes
		debug_level	1
		debugfile	$LOGFILE
		logfile		$LOGFILE
	!
	echo "Performing apphbd success case tests" | tee -a $LOGFILE
	if
          $APPHBD -s >/dev/null 2>&1
        then
          echo "That's odd, $APPHBD is already running."
	  killcount=2
	  $APPHBD -k >/dev/null 2>&1
        else
          killcount=1
        fi
	$APPHBD -c $CFFILE
	sleep 5
	$APPHBTEST -R -i 1000 -p $clientcount -n 5 >> $LOGFILE 2>&1 
	for pat in 'apphb_client_register:' 'type=setint' 'debug:.*apphb_client_remove:'
	do
	  CheckPat "$pat" $clientcount
	done
	CheckPat "failed to heartbeat|resumed heartbeats" 0
	
	echo "Performing apphbd failure case tests" | tee -a $LOGFILE
	$APPHBTEST -R -F -i 1000 -p 1 -n 5 >>$CFFILE 2>&1
	for pat in "'failtest'.* failed to heartbeat" "'failtest'.* resumed heartbeats"
	do
          CheckPat "$pat" 1 2
	done
	sleep 5
	CheckPat "WARN:.*hangup" 1
	$APPHBD -k $CFFILE
	CheckPat "info:.*apphbd.*stopped" $killcount
	if
          $APPHBD -s >/dev/null 2>&1
        then
          echo "ERROR: $APPHBD is still running!" | tee -a $LOGFILE
        fi
}

IPCtest() {
	echo "Starting IPC tests" | tee -a $LOGFILE
	$IPCTEST >>$LOGFILE 2>&1
	errcount=`expr $errcount + $?`
}

SNMPAgentTest() {
	if 
	    [ ! -f $SNMPAGENTTEST ]
	then 
	    return 0
	fi
	if 
	    HBStart
	then
	    sleep 1
	    echo "starting SNMP Agent tests" | tee -a $LOGFILE
	    $SNMPAGENTTEST >> $LOGFILE 2>&1
	    ret=$?
	    errcount=`expr $errcount + $ret`
	    if 
	        [ $ret != 0 ]
	    then 
	        echo "SNMP Agent tests failed."
	    else 
	    	echo "SNMP Agent tests pass."
	    fi
	    HBStop
	fi
}

#
#	Check our identity.
#	Set Up Config Files.
#	Run Tests.
#	Remove Config Files.
#

ID=`/usr/bin/whoami`
case $ID in
  root)	: OK;;
  *)	echo "Must be root to run this.  Sorry."
	exit 1;;
esac

trap 'RemoveConfigFiles' 0
SetUpConfigFiles
> $LOGFILE

TestHeartbeat
StonithCheck
AppHBCheck
IPCtest
SNMPAgentTest


if
  [ -f $HADIR/core ]
then
  errcount=`expr $errcount + 1`
  echo "OOPS! We generated a core file!"
  ls -l $HADIR/core
  file $HADIR/core
fi

if
  sync
  egrep  'CRIT|ERROR' $LOGFILE
then
  echo "OOPS! Looks like we had some errors come up."
  errcount=`expr $errcount + 1`
fi

echo "$errcount errors. Log file is stored in $LOGFILE"
exit $errcount
