📄 test-framework.sh
字号:
#!/bin/bash# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:trap 'print_summary && echo "test-framework exiting on error"' ERRset -e#set -xexport REFORMAT=${REFORMAT:-""}export VERBOSE=falseexport GMNALNID=${GMNALNID:-/usr/sbin/gmlndnid}export CATASTROPHE=${CATASTROPHE:-/proc/sys/lnet/catastrophe}#export PDSH="pdsh -S -Rssh -w"# eg, assert_env LUSTRE MDSNODES OSTNODES CLIENTSassert_env() { local failed="" for name in $@; do if [ -z "${!name}" ]; then echo "$0: $name must be set" failed=1 fi done [ $failed ] && exit 1 || true}usage() { echo "usage: $0 [-r] [-f cfgfile]" echo " -r: reformat" exit}print_summary () { [ "$TESTSUITE" == "lfscktest" ] && return 0 [ -n "$ONLY" ] && echo "WARNING: ONLY is set to ${ONLY}." local form="%-13s %-17s %s\n" printf "$form" "status" "script" "skipped tests E(xcluded) S(low)" echo "------------------------------------------------------------------------------------" for O in $TESTSUITE_LIST; do local skipped="" local slow="" local o=$(echo $O | tr "[:upper:]" "[:lower:]") o=${o//_/-} o=${o//tyn/tyN} local log=${TMP}/${o}.log [ -f $log ] && skipped=$(grep excluded $log | awk '{ printf " %s", $3 }' | sed 's/test_//g') [ -f $log ] && slow=$(grep SLOW $log | awk '{ printf " %s", $3 }' | sed 's/test_//g') [ "${!O}" = "done" ] && \ printf "$form" "Done" "$O" "E=$skipped" && \ [ -n "$slow" ] && printf "$form" "-" "-" "S=$slow" done for O in $TESTSUITE_LIST; do [ "${!O}" = "no" ] && \ printf "$form" "Skipped" "$O" "" done for O in $TESTSUITE_LIST; do [ "${!O}" = "done" -o "${!O}" = "no" ] || \ printf "$form" "UNFINISHED" "$O" "" done}init_test_env() { export LUSTRE=`absolute_path $LUSTRE` export TESTSUITE=`basename $0 .sh` export LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest} [ -d /r ] && export ROOT=${ROOT:-/r} export TMP=${TMP:-$ROOT/tmp} export TESTSUITELOG=${TMP}/${TESTSUITE}.log export HOSTNAME=${HOSTNAME:-`hostname`} export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LFS" ] && export LFS=$(which lfs) export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"} [ ! -f "$MKFS" ] && export MKFS=$(which mkfs.lustre) export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"} [ ! -f "$TUNEFS" ] && export TUNEFS=$(which tunefs.lustre) export CHECKSTAT="${CHECKSTAT:-"checkstat -v"} " export FSYTPE=${FSTYPE:-"ldiskfs"} export NAME=${NAME:-local} export LPROC=/proc/fs/lustre export DIR2 export AT_MAX_PATH if [ "$ACCEPTOR_PORT" ]; then export PORT_OPT="--port $ACCEPTOR_PORT" fi # Paths on remote nodes, if different export RLUSTRE=${RLUSTRE:-$LUSTRE} export RPWD=${RPWD:-$PWD} export I_MOUNTED=${I_MOUNTED:-"no"} # command line while getopts "rvf:" opt $*; do case $opt in f) CONFIG=$OPTARG;; r) REFORMAT=--reformat;; v) VERBOSE=true;; \?) usage;; esac done shift $((OPTIND - 1)) ONLY=${ONLY:-$*} [ "$TESTSUITELOG" ] && rm -f $TESTSUITELOG || true}case `uname -r` in2.4.*) EXT=".o"; USE_QUOTA=no; [ ! "$CLIENTONLY" ] && FSTYPE=ext3;; *) EXT=".ko"; USE_QUOTA=yes;;esacload_module() { module=$1 shift BASE=`basename $module $EXT` lsmod | grep -q ${BASE} || \ if [ -f ${LUSTRE}/${module}${EXT} ]; then insmod ${LUSTRE}/${module}${EXT} $@ else # must be testing a "make install" or "rpm" installation modprobe $BASE $@ fi}load_modules() { if [ -n "$MODPROBE" ]; then # use modprobe return 0 fi if [ "$HAVE_MODULES" = true ]; then # we already loaded return 0 fi HAVE_MODULES=true echo Loading modules from $LUSTRE load_module ../lnet/libcfs/libcfs [ "$PTLDEBUG" ] && sysctl -w lnet.debug=$PTLDEBUG [ "$SUBSYSTEM" ] && sysctl -w lnet.subsystem_debug=${SUBSYSTEM# } [ -f /etc/modprobe.conf ] && MODPROBECONF=/etc/modprobe.conf [ -f /etc/modprobe.d/Lustre ] && MODPROBECONF=/etc/modprobe.d/Lustre [ -z "$LNETOPTS" -a -n "$MODPROBECONF" ] && \ LNETOPTS=$(awk '/^options lnet/ { print $0}' $MODPROBECONF | sed 's/^options lnet //g') echo "lnet options: '$LNETOPTS'" # note that insmod will ignore anything in modprobe.conf load_module ../lnet/lnet/lnet $LNETOPTS LNETLND=${LNETLND:-"socklnd/ksocklnd"} load_module ../lnet/klnds/$LNETLND load_module lvfs/lvfs load_module obdclass/obdclass load_module ptlrpc/ptlrpc [ "$USE_QUOTA" = "yes" ] && load_module quota/lquota load_module mdc/mdc load_module osc/osc load_module lov/lov load_module mgc/mgc if [ -z "$CLIENTONLY" ] && [ -z "$CLIENTMODSONLY" ]; then load_module mgs/mgs load_module mds/mds grep -q crc16 /proc/kallsyms || { modprobe crc16 2>/dev/null || true; } [ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs load_module lvfs/fsfilt_$FSTYPE load_module ost/ost load_module obdfilter/obdfilter fi load_module llite/lustre load_module llite/llite_lloop rm -f $TMP/ogdb-$HOSTNAME OGDB=$TMP [ -d /r ] && OGDB="/r/tmp" $LCTL modules > $OGDB/ogdb-$HOSTNAME # 'mount' doesn't look in $PATH, just sbin [ -f $LUSTRE/utils/mount.lustre ] && cp $LUSTRE/utils/mount.lustre /sbin/. || true}RMMOD=rmmodif [ `uname -r | cut -c 3` -eq 4 ]; then RMMOD="modprobe -r"fiwait_for_lnet() { local UNLOADED=0 local WAIT=0 local MAX=60 MODULES=$($LCTL modules | awk '{ print $2 }') while [ -n "$MODULES" ]; do sleep 5 $RMMOD $MODULES >/dev/null 2>&1 || true MODULES=$($LCTL modules | awk '{ print $2 }') if [ -z "$MODULES" ]; then return 0 else WAIT=$((WAIT + 5)) echo "waiting, $((MAX - WAIT)) secs left" fi if [ $WAIT -eq $MAX ]; then echo "LNET modules $MODULES will not unload" lsmod return 3 fi done}unload_modules() { wait_exit_ST client # bug 12845 lsmod | grep libcfs > /dev/null && $LCTL dl local MODULES=$($LCTL modules | awk '{ print $2 }' | grep -v libcfs) || true $RMMOD $MODULES > /dev/null 2>&1 || true # do it again, in case we tried to unload ksocklnd too early MODULES=$($LCTL modules | awk '{ print $2 }' | grep -v libcfs) || true [ -n "$MODULES" ] && $RMMOD $MODULES > /dev/null 2>&1 || true lsmod | grep libcfs > /dev/null && $LCTL dk $TMP/debug $RMMOD libcfs MODULES=$($LCTL modules | awk '{ print $2 }') if [ -n "$MODULES" ]; then echo "Modules still loaded: " echo $MODULES if [ "$(lctl dl)" ]; then echo "Lustre still loaded" lctl dl || true lsmod return 2 else echo "Lustre stopped but LNET is still loaded, waiting..." wait_for_lnet || return 3 fi fi HAVE_MODULES=false LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd mem.*leaked" || true) LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then echo "$LEAK_LUSTRE" 1>&2 echo "$LEAK_PORTALS" 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` || true echo "Memory leaks detected" [ -n "$IGNORE_LEAK" ] && echo "ignoring leaks" && return 0 return 254 fi echo "modules unloaded." return 0}# Facet functionsmount_facet() { local facet=$1 shift local dev=${facet}_dev local opt=${facet}_opt echo "Starting ${facet}: ${!opt} $@ ${!dev} ${MOUNT%/*}/${facet}" do_facet ${facet} mount -t lustre ${!opt} $@ ${!dev} ${MOUNT%/*}/${facet} RC=${PIPESTATUS[0]} if [ $RC -ne 0 ]; then echo "mount -t lustre $@ ${device} ${MOUNT%/*}/${facet}" echo "Start of ${device} on ${facet} failed ${RC}" else do_facet ${facet} "sysctl -w lnet.debug=$PTLDEBUG; \ sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }; \ sysctl -w lnet.debug_mb=${DEBUG_SIZE}; \ sync" fi return $RC}# start facet device options start() { facet=$1 shift device=$1 shift eval export ${facet}_dev=${device} eval export ${facet}_opt=\"$@\" do_facet ${facet} mkdir -p ${MOUNT%/*}/${facet} mount_facet ${facet} RC=$? if [ $RC -eq 0 ]; then label=$(do_facet ${facet} "e2label ${device}") [ -z "$label" ] && echo no label for ${device} && exit 1 eval export ${facet}_svc=${label} echo Started ${label} fi return $RC}stop() { local running facet=$1 shift HOST=`facet_active_host $facet` [ -z $HOST ] && echo stop: no host for $facet && return 0 running=$(do_facet ${facet} "grep -c ${MOUNT%/*}/${facet}' ' /proc/mounts") || true if [ ${running} -ne 0 ]; then echo "Stopping ${MOUNT%/*}/${facet} (opts:$@)" do_facet ${facet} umount -d $@ ${MOUNT%/*}/${facet} fi # umount should block, but we should wait for unrelated obd's # like the MGS or MGC to also stop. wait_exit_ST ${facet}}zconf_mount() { local OPTIONS local client=$1 local mnt=$2 # Only supply -o to mount if we have options if [ -n "$MOUNTOPT" ]; then OPTIONS="-o $MOUNTOPT" fi local device=$MGSNID:/$FSNAME if [ -z "$mnt" -o -z "$FSNAME" ]; then echo Bad zconf mount command: opt=$OPTIONS dev=$device mnt=$mnt exit 1 fi echo "Starting client: $OPTIONS $device $mnt" do_node $client mkdir -p $mnt do_node $client mount -t lustre $OPTIONS $device $mnt || return 1 do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }; sysctl -w lnet.debug_mb=${DEBUG_SIZE}" [ -d /r ] && $LCTL modules > /r/tmp/ogdb-$HOSTNAME return 0}zconf_umount() { client=$1 mnt=$2 [ "$3" ] && force=-f local running=$(do_node $client "grep -c $mnt' ' /proc/mounts") || true if [ $running -ne 0 ]; then echo "Stopping client $mnt (opts:$force)" lsof | grep "$mnt" || true do_node $client umount $force $mnt fi}shutdown_facet() { facet=$1 if [ "$FAILURE_MODE" = HARD ]; then $POWER_DOWN `facet_active_host $facet` sleep 2 elif [ "$FAILURE_MODE" = SOFT ]; then stop $facet fi}reboot_facet() { facet=$1 if [ "$FAILURE_MODE" = HARD ]; then $POWER_UP `facet_active_host $facet` else sleep 10 fi}# verify that lustre actually cleaned up properlycleanup_check() { [ -f $CATASTROPHE ] && [ `cat $CATASTROPHE` -ne 0 ] && \ error "LBUG/LASSERT detected" BUSY=`dmesg | grep -i destruct || true` if [ "$BUSY" ]; then echo "$BUSY" 1>&2 [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.`date +%s` exit 205 fi LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked" || true` LEAK_PORTALS=`dmesg | tail -n 20 | grep "Portals memory leaked" || true` if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then echo "$0: $LEAK_LUSTRE" 1>&2 echo "$0: $LEAK_PORTALS" 1>&2 echo "$0: Memory leak(s) detected..." 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` exit 204 fi [ "`lctl dl 2> /dev/null | wc -l`" -gt 0 ] && lctl dl && \ echo "$0: lustre didn't clean up..." 1>&2 && return 202 || true if [ "`/sbin/lsmod 2>&1 | egrep 'lnet|libcfs'`" ]; then echo "$0: modules still loaded..." 1>&2 /sbin/lsmod 1>&2 return 203 fi return 0}wait_delete_completed () { local TOTALPREV=`awk 'BEGIN{total=0}; {total+=$1}; END{print total}' \ $LPROC/osc/*/kbytesavail` local WAIT=0 local MAX_WAIT=20 while [ "$WAIT" -ne "$MAX_WAIT" ]; do sleep 1 TOTAL=`awk 'BEGIN{total=0}; {total+=$1}; END{print total}' \ $LPROC/osc/*/kbytesavail` [ "$TOTAL" -eq "$TOTALPREV" ] && break echo "Waiting delete completed ... prev: $TOTALPREV current: $TOTAL " TOTALPREV=$TOTAL WAIT=$(( WAIT + 1)) done echo "Delete completed."}wait_for_host() { HOST=$1 check_network "$HOST" 900 while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done}wait_for() { facet=$1 HOST=`facet_active_host $facet` wait_for_host $HOST}wait_mds_recovery_done () { local timeout=`do_facet mds sysctl -n lustre.timeout`#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2)# as we are in process of changing obd_timeout in different ways# let's set MAX longer than that MAX=$(( timeout * 4 )) WAIT=0 while [ $WAIT -lt $MAX ]; do STATUS=`do_facet mds grep status /proc/fs/lustre/mds/*-MDT*/recovery_status` echo $STATUS | grep COMPLETE && return 0 sleep 5 WAIT=$((WAIT + 5)) echo "Waiting $(($MAX - $WAIT)) secs for MDS recovery done" done echo "MDS recovery not done in $MAX sec" return 1 }wait_exit_ST () { local facet=$1 local WAIT=0 local INTERVAL=1 # conf-sanity 31 takes a long time cleanup while [ $WAIT -lt 300 ]; do running=$(do_facet ${facet} "lsmod | grep lnet > /dev/null && lctl dl | grep ' ST '") || true [ -z "${running}" ] && return 0 echo "waited $WAIT for${running}" [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL)) sleep $INTERVAL WAIT=$((WAIT + INTERVAL)) done echo "service didn't stop after $WAIT seconds. Still running:" echo ${running} return 1}client_df() { # not every config has many clients if [ -n "$CLIENTS" ]; then $PDSH $CLIENTS "df $MOUNT" > /dev/null else df $MOUNT > /dev/null fi}client_reconnect() {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -