📄 basicsanitycheck.in
字号:
#!/bin/sh## Basic tests of sanity for a newly-built version of# linux-HA software (heartbeat)## Conditions for running:## Heartbeat must be installed.## Must be root.## CANNOT have a real heartbeat configuration.## Must have networking configured with one working# network interface.## should not have $TESTIP below used for anything ;-)## should have multicast address $MCASTIP port 694# available# (you don't need a multicast capable router).### Note: you might have to change TESTIP and MCASTIP#TESTIP=10.253.252.251MCASTIP=225.224.223.222#IFCONFIG="@IFCONFIG@ @IFCONFIG_A_OPT@"HADIR=@sysconfdir@/ha.dHBSCRIPT=@INITDIR@/heartbeat@INIT_EXT@STONITH=@sbindir@/stonithLIBDIR=@libdir@/HBLIB=$LIBDIR/heartbeatAPPHBD=$HBLIB/apphbdCLSTATUS=@bindir@/cl_statusAPPHBTEST=$HBLIB/apphbtestIPCTEST=$HBLIB/ipctestLRMTEST=$HBLIB/lrmtest/LRMBasicSanityCheckSNMPAGENTTEST=$HBLIB/SNMPAgentSanityCheckMALLOC_CHECK_=2; export MALLOC_CHECK_#DEFAULTINTERFACE=eth0 # But we really guess it...#IDENTSTRING="Linux-HA TEST configuration file - REMOVEME!!"DUMMYNODE=ImAlwaysDead.comLOCALNODE=`uname -n`LOGFILE=/tmp/linux-ha.testlogRSCDIR=$HADIR/resource.derrcount=0cd $HADIRulimit -c unlimitedGetAllIFNames() { if [ "apple" = "@build_vendor@" ]; then # "flags=8863" signifies and active interface that supports tcp $IFCONFIG | grep '^[a-zA-Z]' | grep "flags=8863" | sed 's%:* .*%%' else $IFCONFIG | grep '^[a-zA-Z]' | sed 's%:* .*%%' fi}GuessIFname() { GetAllIFNames | grep -v '^lo' | head -n 1}INTERFACE=`GuessIFname`case $INTERFACE in "") INTERFACE=$DEFAULTINTERFACE;;esac## Is it safe to overwrite this config file?#CheckConfigFile() { if [ ! -f $1 ] then return 0 fi if sync grep "$IDENTSTRING" $1 >/dev/null 2>&1 then return 0 else return 1 fi}GenerateAuthKeys() { cat <<-! >$1 # $IDENTSTRING # auth 1 1 sha1 SuperSecretKey--SHHH!!! 2 md5 Encript-string-for-md5!! 3 crc ! chmod 600 $1}GenerateHaCf() { cat <<-! >$1 # $IDENTSTRING logfile /dev/null debugfile $LOGFILE keepalive 10ms debug 1 deadtime 5 initdead 5 auto_failback on stonith_host * null $LOCALNODE $DUMMYNODE mcast $INTERFACE $MCASTIP 694 0 0 #bcast $INTERFACE node $LOCALNODE $DUMMYNODE respawn @HA_CCMUSER@ $HBLIB/ccm respawn @HA_CCMUSER@ $HBLIB/ipfail # Eventually I'd like to be able to enable this and # have things work right... #respawn root $HBLIB/hbagent ! chmod 644 $1}GenerateHaResources() { cat <<-! >$1 # $IDENTSTRING $DUMMYNODE IPaddr::$TESTIP/30 ! chmod 644 $1}CONFIGFILES="ha.cf authkeys haresources"SetUpConfigFiles() { if HBStatus then echo "Should not run tests with heartbeat already running." fi SaveConfigFiles for j in $CONFIGFILES do if CheckConfigFile $HADIR/$j then : OK else echo "OOPS! $HADIR/$j already exists!" echo "Real configuration already set up." echo "Sorry..." exit 1 fi done GenerateAuthKeys $HADIR/authkeys GenerateHaCf $HADIR/ha.cf GenerateHaResources $HADIR/haresources rm -f $HADIR/core}RemoveConfigFiles() { for j in $CONFIGFILES do if CheckConfigFile $HADIR/$j then rm -f $HADIR/$j else echo "OOPS! Cannot remove real config file $HADIR/$j!" fi done RestoreConfigFiles}SaveConfigFiles() { cd $HADIR if [ ! -d .cfsave ] then mkdir .cfsave fi mv $CONFIGFILES .cfsave >/dev/null 2>&1}RestoreConfigFiles() { mv $HADIR/.cfsave/* $HADIR >/dev/null 2>&1}HBStart() { echo "Starting heartbeat" $HBSCRIPT start}HBStop() { echo "Stopping heartbeat" $HBSCRIPT stop}HBReload() { echo "Reloading heartbeat" $HBSCRIPT reload >/dev/null 2>&1 rc=$? sleep 5 return $rc}HBStatus() { case `$HBSCRIPT status 2>&1` in *running*) true;; *) false;; esac}## Search the log file for the given grep pattern#LookForString() { count=1 while if grep -i "$1" $LOGFILE then return 0 fi [ $count -lt 60 ] do count=`expr $count + 1` sleep 1 done return 1}changeAuthkeys(){ awk 'BEGIN{method = 1; done =0 ; pass=1} \{ if(pass == 2){ if ($1 == "auth")print "auth " method; \else print $0 ; next } \if (done ==1 || $1 == "#" || $1 == " ") next; \if ($1 == "auth") {method = $2; next} \if ($1 != method) {done=1; method =$1; pass=2; nextfile}}' \$HADIR/authkeys $HADIR/authkeys >/tmp/tmpfilemv /tmp/tmpfile $HADIR/authkeyschmod 600 $HADIR/authkeys}# Check for the given count of the given string# Complain unless the right number are there.CheckPat(){ count=`egrep -ic "$1" $LOGFILE` min=$2 if [ $# -gt 2 ] then max=$3 else max=$2 fi if [ $count -lt $min -o $count -gt $max ] then echo "ERROR: Did not find [$2:$3] occurances of $1 in $LOGFILE `date`" 2>&1 | tee -a $LOGFILE echo "ERROR: Found $count instead." | tee -a $LOGFILE errcount=`expr $errcount + 1` fi}TestHeartbeat() { if HBStatus then echo "That's weird. Heartbeat seems to be running..." HBStop fi if $CLSTATUS hbstatus >/dev/null 2>&1 then echo "$CLSTATUS shows heartbeat running" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStart then if HBStatus then : COOL! else echo "Heartbeat did not start." | tee -a $LOGFILE exit 1 fi fi sleep 5 j=0 while [ "$j" -lt 30 ] do if $CLSTATUS hbstatus >/dev/null 2>&1 then break else sleep 1 fi j=`expr "$j" + 1` done if $CLSTATUS hbstatus >/dev/null 2>&1 then : OK else echo "$CLSTATUS shows heartbeat not running ($?)" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if $CLSTATUS nodestatus $LOCALNODE >/dev/null 2>&1 then : OK else echo "$CLSTATUS shows local status as dead ($?)" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi # # Heartbeat seems to be running... # if LookForString "node $DUMMYNODE.*is dead" >/dev/null then : OK else echo "Does not look like we noticed $DUMMYNODE was dead" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if $CLSTATUS nodestatus $DUMMYNODE >/dev/null 2>&1 then echo "$CLSTATUS shows $DUMMYNODE status as alive(!)" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if LookForString "Resetting node $DUMMYNODE with" >/dev/null && LookForString "node $DUMMYNODE now reset" >/dev/null then : OK else echo "Does not look like we STONITHed $DUMMYNODE" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if LookForString "IPaddr.*$TESTIP" >/dev/null then : Looks good else echo "Does not look like we took over the IP address" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi # Wait until heartbeat thinks things are stable # that is, not in "transition" for j in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 do if STAT=`$CLSTATUS rscstatus 2>/dev/null` then case "$STAT" in transition) sleep 1 ;; all) break ;; *) echo "$CLSTATUS shows resource status as $STAT" | tee -a $LOGFILE break ;; esac else echo "$CLSTATUS rscstatus failed [$?]" | tee -a $LOGFILE break fi done if LookForString "[Aa][Rr][Pp]" >/dev/null then : Looks good else echo "Does not look like we ARPed the address" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 && $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1 then : COOL! else echo "Looks like monitor operation failed" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi # # Reload test -- ha.cf changed # echo "Performing ha.cf reload test" >> $LOGFILE touch $HADIR/ha.cf if HBReload then : OK! reload after touching ha.cf worked! else echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if LookForString "restart exec" >/dev/null then : Looks good else echo "Does not look like we did a restart exec." | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStatus then : "OK - reload didn't kill anything ;-)" fi # # Reload test -- authkeys changed # echo "Performing authkeys reload test" >> $LOGFILE changeAuthkeys if HBReload then : OK! reload after changing authkeys worked! else echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if CheckPat "Signalling.* to reread config files" 2 >/dev/null then : OK else echo "Heartbeat did not reread config files exactly twice" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi sleep 2 if CheckPat "restart exec" 1 >/dev/null then : Looks good -- did not do another exec else echo "Looks like we did an extra exec" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStatus then : "OK - reload didn't kill anything ;-)" fi echo "Stopping heartbeat." >> $LOGFILE if HBStop then : OK! else echo "Heartbeat stop operation returned $?" | tee -a $LOGFILE errcount=`expr $errcount + 1` fi if HBStatus then echo "Looks like heartbeat did not really stop." | tee -a $LOGFILE echo "You\'ll probably need to kill some processes yourself." errcount=`expr $errcount + 1` fi if $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 && $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1 then echo "Looks like the test IP address is still live..." errcount=`expr $errcount + 1` fi}StonithCheck() { serrcount=0 echo "Checking STONITH basic sanity." | tee -a $LOGFILE if $STONITH -h >/dev/null then : OK else echo "$STONITH -h failed" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi wc=`$STONITH -h | wc -l` if [ $wc -lt 100 ] then echo "$STONITH -h help message is too short ($wc lines)" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi if FOOBARLIST=`$STONITH -t null -p "foo bar" -l` then : FOOBARLIST OK else echo "$STONITH -t null list option failed" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi if echo $FOOBARLIST | grep foo >/dev/null && echo $FOOBARLIST | grep bar >/dev/null then : OK null list else echo "$STONITH -t null list option incorrect" | tee -a $LOGFILE serrcount=`expr $serrcount + 1` fi if RESETOUT=`$STONITH -t null -p "foo bar" foo 2>&1` then case $RESETOUT in *"Host null-reset: foo"*) : NULL Stonith output OK;; *) echo "NULL reset failed." serrcount=`expr $serrcount + 1`;; esac else echo "$STONITH -t null reset failed." | tee -a $LOGFILE fi errcount=`expr $errcount + $serrcount`}AppHBCheck() { CFFILE=/tmp/$$.apphbd.cf clientcount=5 cat <<-! >$CFFILE realtime yes debug_level 1 debugfile $LOGFILE logfile /dev/null ! echo "Performing apphbd success case tests" | tee -a $LOGFILE if $APPHBD -s >/dev/null 2>&1 then echo "That's odd, $APPHBD is already running." killcount=2 $APPHBD -k >/dev/null 2>&1 else killcount=1 fi $APPHBD -c $CFFILE sleep 5 $APPHBTEST -i 1000 -p $clientcount -n 5 >> $LOGFILE 2>&1 for pat in "apphb_client_register:" "type=setint" "info:.*apphb_client_remove:" do CheckPat "$pat" $clientcount done CheckPat "failed to heartbeat|resumed heartbeats" 0 echo "Performing apphbd failure case tests" | tee -a $LOGFILE $APPHBTEST -F -i 1000 -p 1 -n 5 >>$CFFILE 2>&1 for pat in "'failtest'.* failed to heartbeat" "'failtest'.* resumed heartbeats" do CheckPat "$pat" 1 2 done sleep 5 CheckPat "WARN:.*hangup" 1 $APPHBD -k $CFFILE CheckPat "info:.*apphbd.*stopped" $killcount if $APPHBD -s >/dev/null 2>&1 then echo "ERROR: $APPHBD is still running!" | tee -a $LOGFILE fi}IPCtest() { echo "Starting IPC tests" | tee -a $LOGFILE $IPCTEST >>$LOGFILE 2>&1 errcount=`expr $errcount + $?`}LRMTest() { if [ ! -f $LRMTEST ] then return 0 fi echo "Starting LRM tests" | tee -a $LOGFILE $LRMTEST $HBLIB >> $LOGFILE 2>&1 ret=$? errcount=`expr $errcount + $ret` if [ $ret != 0 ] then echo "LRM tests failed." fi}SNMPAgentTest() { if [ ! -f $SNMPAGENTTEST ] then return 0 fi if HBStart then sleep 6 echo "starting SNMP Agent tests" | tee -a $LOGFILE $SNMPAGENTTEST >> $LOGFILE 2>&1 ret=$? errcount=`expr $errcount + $ret` if [ $ret != 0 ] then echo "SNMP Agent tests failed." else echo "SNMP Agent tests pass." fi HBStop fi}## Check our identity.# Set Up Config Files.# Run Tests.# Remove Config Files.#ID=`@WHOAMI@`case $ID in root) : OK;; *) echo "Must be root to run this. Sorry." exit 1;;esactrap 'RemoveConfigFiles' 0SetUpConfigFiles> $LOGFILECOREPID=/proc/sys/kernel/core_uses_pidif [ -f "$COREPID" ]then echo 1 > $COREPIDfiTestHeartbeatStonithCheckAppHBCheckIPCtestLRMTestSNMPAgentTestif [ -f $HADIR/core ]then errcount=`expr $errcount + 1` echo "OOPS! We generated a core file!" ls -l $HADIR/core file $HADIR/corefiif sync egrep 'CRIT|ERROR' $LOGFILEthen echo "OOPS! Looks like we had some errors come up." errcount=`expr $errcount + 1`fiecho "$errcount errors. Log file is stored in $LOGFILE"exit $errcount
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -