📄 conf-sanity.sh
字号:
stop_ost2 stop_mds}run_test 21c "start mds between two osts, stop mds last"test_22() { #reformat to remove all logs reformat start_mds echo Client mount before any osts are in the logs mount_client $MOUNT check_mount && return 41 pass echo Client mount with ost in logs, but none running start_ost stop_ost mount_client $MOUNT # check_mount will block trying to contact ost umount_client $MOUNT pass echo Client mount with a running ost start_ost mount_client $MOUNT check_mount || return 41 pass cleanup}run_test 22 "start a client before osts (should return errs)"test_23a() { # was test_23 setup # fail mds stop mds # force down client so that recovering mds waits for reconnect local running=$(grep -c $MOUNT /proc/mounts) || true if [ $running -ne 0 ]; then echo "Stopping client $MOUNT (opts: -f)" umount -f $MOUNT fi # enter recovery on mds start_mds # try to start a new client mount_client $MOUNT & sleep 5 MOUNT_PID=$(ps -ef | grep "t lustre" | grep -v grep | awk '{print $2}') MOUNT_LUSTRE_PID=`ps -ef | grep mount.lustre | grep -v grep | awk '{print $2}'` echo mount pid is ${MOUNT_PID}, mount.lustre pid is ${MOUNT_LUSTRE_PID} ps --ppid $MOUNT_PID ps --ppid $MOUNT_LUSTRE_PID # FIXME why o why can't I kill these? Manual "ctrl-c" works... kill -TERM $MOUNT_LUSTRE_PID echo "waiting for mount to finish" ps -ef | grep mount # we can not wait $MOUNT_PID because it is not a child of this shell local PID1 local PID2 local WAIT=0 local MAX_WAIT=20 local sleep=1 while [ "$WAIT" -lt "$MAX_WAIT" ]; do sleep $sleep PID1=$(ps -ef | awk '{print $2}' | grep -w $MOUNT_PID) PID2=$(ps -ef | awk '{print $2}' | grep -w $MOUNT_LUSTRE_PID) echo PID1=$PID1 echo PID2=$PID2 [ -z "$PID1" -a -z "$PID2" ] && break echo "waiting for mount to finish ... " WAIT=$(( WAIT + sleep)) done [ "$WAIT" -eq "$MAX_WAIT" ] && error "MOUNT_PID $MOUNT_PID and \ MOUNT__LUSTRE_PID $MOUNT__LUSTRE_PID still not killed in $WAIT secs" ps -ef | grep mount stop_mds || error stop_ost || error}run_test 23a "interrupt client during recovery mount delay"umount_client $MOUNTcleanup_noclitest_23b() { # was test_23 start_ost start_mds # Simulate -EINTR during mount OBD_FAIL_LDLM_CLOSE_THREAD sysctl -w lustre.fail_loc=0x80000313 mount_client $MOUNT cleanup}run_test 23b "Simulate -EINTR during mount"fs2mds_HOST=$mds_HOSTfs2ost_HOST=$ost_HOSTcleanup_24a() { trap 0 echo "umount $MOUNT2 ..." umount $MOUNT2 || true echo "stopping fs2mds ..." stop fs2mds -f || true echo "stopping fs2ost ..." stop fs2ost -f || true}test_24a() { [ -n "$ost1_HOST" ] && fs2ost_HOST=$ost1_HOST if [ -z "$fs2ost_DEV" -o -z "$fs2mds_DEV" ]; then do_facet mds [ -b "$MDSDEV" ] && \ skip "mixed loopback and real device not working" && return fi local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2} local fs2ostdev=${fs2ost_DEV:-$(ostdevname 1)_2} # test 8-char fsname as well local FSNAME2=test1234 add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --nomgs --mgsnode=$MGSNID --reformat $fs2mdsdev || exit 10 add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2ostdev || exit 10 setup start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && trap cleanup_24a EXIT INT start fs2ost $fs2ostdev $OST_MOUNT_OPTS mkdir -p $MOUNT2 mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || return 1 # 1 still works check_mount || return 2 # files written on 1 should not show up on 2 cp /etc/passwd $DIR/$tfile sleep 10 [ -e $MOUNT2/$tfile ] && error "File bleed" && return 7 # 2 should work cp /etc/passwd $MOUNT2/b || return 3 rm $MOUNT2/b || return 4 # 2 is actually mounted grep $MOUNT2' ' /proc/mounts > /dev/null || return 5 # failover facet_failover fs2mds facet_failover fs2ost df umount_client $MOUNT # the MDS must remain up until last MDT stop_mds MDS=$(do_facet mds "lctl get_param -n devices" | awk '($3 ~ "mdt" && $4 ~ "MDS") { print $4 }') [ -z "$MDS" ] && error "No MDS" && return 8 cleanup_24a cleanup_nocli || return 6}run_test 24a "Multiple MDTs on a single node"test_24b() { if [ -z "$fs2mds_DEV" ]; then do_facet mds [ -b "$MDSDEV" ] && \ skip "mixed loopback and real device not working" && return fi local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2} add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat $fs2mdsdev || exit 10 setup start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && return 2 cleanup || return 6}run_test 24b "Multiple MGSs on a single node (should return err)"test_25() { setup check_mount || return 2 local MODULES=$($LCTL modules | awk '{ print $2 }') rmmod $MODULES 2>/dev/null || true cleanup || return 6}run_test 25 "Verify modules are referenced"test_26() { load_modules # we need modules before mount for sysctl, so make sure... do_facet mds "lsmod | grep -q lustre || modprobe lustre"#define OBD_FAIL_MDS_FS_SETUP 0x135 do_facet mds "sysctl -w lustre.fail_loc=0x80000135" start_mds && echo MDS started && return 1 lctl get_param -n devices DEVS=$(lctl get_param -n devices | wc -l) [ $DEVS -gt 0 ] && return 2 unload_modules || return 203}run_test 26 "MDT startup failure cleans LOV (should return errs)"set_and_check() { local myfacet=$1 local TEST=$2 local PARAM=$3 local ORIG=$(do_facet $myfacet "$TEST") if [ $# -gt 3 ]; then local FINAL=$4 else local -i FINAL FINAL=$(($ORIG + 5)) fi echo "Setting $PARAM from $ORIG to $FINAL" do_facet mds "$LCTL conf_param $PARAM=$FINAL" || error conf_param failed local RESULT local MAX=90 local WAIT=0 while [ 1 ]; do sleep 5 RESULT=$(do_facet $myfacet "$TEST") if [ $RESULT -eq $FINAL ]; then echo "Updated config after $WAIT sec (got $RESULT)" break fi WAIT=$((WAIT + 5)) if [ $WAIT -eq $MAX ]; then echo "Config update not seen: wanted $FINAL got $RESULT" return 3 fi echo "Waiting $(($MAX - $WAIT)) secs for config update" done}test_27a() { start_ost || return 1 start_mds || return 2 echo "Requeue thread should have started: " ps -e | grep ll_cfg_requeue set_and_check ost1 "lctl get_param -n obdfilter.$FSNAME-OST0000.client_cache_seconds" "$FSNAME-OST0000.ost.client_cache_seconds" || return 3 cleanup_nocli}run_test 27a "Reacquire MGS lock if OST started first"test_27b() { setup facet_failover mds set_and_check mds "lctl get_param -n mds.$FSNAME-MDT0000.group_acquire_expire" "$FSNAME-MDT0000.mdt.group_acquire_expire" || return 3 set_and_check client "lctl get_param -n mdc.$FSNAME-MDT0000-mdc-*.max_rpcs_in_flight" "$FSNAME-MDT0000.mdc.max_rpcs_in_flight" || return 4 cleanup}run_test 27b "Reacquire MGS lock after failover"test_28() { setup TEST="lctl get_param -n llite.$FSNAME-*.max_read_ahead_whole_mb" ORIG=$($TEST) declare -i FINAL FINAL=$(($ORIG + 10)) set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" || return 3 set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" || return 3 umount_client $MOUNT || return 200 mount_client $MOUNT RESULT=$($TEST) if [ $RESULT -ne $FINAL ]; then echo "New config not seen: wanted $FINAL got $RESULT" return 4 else echo "New config success: got $RESULT" fi cleanup}run_test 28 "permanent parameter setting"test_29() { [ "$OSTCOUNT" -lt "2" ] && skip "$OSTCOUNT < 2, skipping" && return setup > /dev/null 2>&1 start_ost2 sleep 10 local PARAM="$FSNAME-OST0001.osc.active" local PROC_ACT="osc.$FSNAME-OST0001-osc-*.active" local PROC_UUID="osc.$FSNAME-OST0001-osc-*.ost_server_uuid" ACTV=$(lctl get_param -n $PROC_ACT) DEAC=$((1 - $ACTV)) set_and_check client "lctl get_param -n $PROC_ACT" "$PARAM" $DEAC || return 2 # also check ost_server_uuid status RESULT=$(lctl get_param -n $PROC_UUID | grep DEACTIV) if [ -z "$RESULT" ]; then echo "Live client not deactivated: $(lctl get_param -n $PROC_UUID)" return 3 else echo "Live client success: got $RESULT" fi # check MDT too local MPROC="osc.$FSNAME-OST0001-osc.active" local MAX=30 local WAIT=0 while [ 1 ]; do sleep 5 RESULT=`do_facet mds " lctl get_param -n $MPROC"` [ ${PIPESTATUS[0]} = 0 ] || error "Can't read $MPROC" if [ $RESULT -eq $DEAC ]; then echo "MDT deactivated also after $WAIT sec (got $RESULT)" break fi WAIT=$((WAIT + 5)) if [ $WAIT -eq $MAX ]; then echo "MDT not deactivated: wanted $DEAC got $RESULT" return 4 fi echo "Waiting $(($MAX - $WAIT)) secs for MDT deactivated" done # test new client starts deactivated umount_client $MOUNT || return 200 mount_client $MOUNT RESULT=$(lctl get_param -n $PROC_UUID | grep DEACTIV | grep NEW) if [ -z "$RESULT" ]; then echo "New client not deactivated from start: $(lctl get_param -n $PROC_UUID)" return 5 else echo "New client success: got $RESULT" fi # make sure it reactivates set_and_check client "lctl get_param -n $PROC_ACT" "$PARAM" $ACTV || return 6 umount_client $MOUNT stop_ost2 cleanup_nocli #writeconf to remove all ost2 traces for subsequent tests writeconf start_mds start_ost cleanup}run_test 29 "permanently remove an OST"test_30() { setup TEST="lctl get_param -n llite.$FSNAME-*.max_read_ahead_whole_mb" ORIG=$($TEST) for i in $(seq 1 20); do set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $i || return 3 done # make sure client restart still works umount_client $MOUNT mount_client $MOUNT || return 4 [ "$($TEST)" -ne "$i" ] && return 5 set_and_check client "$TEST" "$FSNAME.llite.max_read_ahead_whole_mb" $ORIG || return 6 cleanup}run_test 30 "Big config llog"test_31() { # bug 10734 # ipaddr must not exist mount -t lustre 4.3.2.1@tcp:/lustre $MOUNT || true cleanup}run_test 31 "Connect to non-existent node (returns errors, should not crash)"test_32a() { # XXX - make this run on client-only systems with real hardware on # the OST and MDT # there appears to be a lot of assumption here about loopback # devices # or maybe this test is just totally useless on a client-only system [ "$mds_HOST" = "`hostname`" ] || { skip "remote MDS" && return 0; } [ "$ost_HOST" = "`hostname`" -o "$ost1_HOST" = "`hostname`" ] || \ { skip "remote OST" && return 0; } [ -z "$TUNEFS" ] && skip "No tunefs" && return local DISK1_4=$LUSTRE/tests/disk1_4.zip [ ! -r $DISK1_4 ] && skip "Cant find $DISK1_4, skipping" && return mkdir -p $TMP/$tdir unzip -o -j -d $TMP/$tdir $DISK1_4 || { skip "Cant unzip $DISK1_4, skipping" && return ; } load_modules sysctl lnet.debug=$PTLDEBUG $TUNEFS $TMP/$tdir/mds || error "tunefs failed" # nids are wrong, so client wont work, but server should start start mds $TMP/$tdir/mds "-o loop,exclude=lustre-OST0000" || return 3 local UUID=$(lctl get_param -n mds.lustre-MDT0000.uuid) echo MDS uuid $UUID [ "$UUID" == "mdsA_UUID" ] || error "UUID is wrong: $UUID" $TUNEFS --mgsnode=`hostname` $TMP/$tdir/ost1 || error "tunefs failed" start ost1 $TMP/$tdir/ost1 "-o loop" || return 5 UUID=$(lctl get_param -n obdfilter.lustre-OST0000.uuid) echo OST uuid $UUID [ "$UUID" == "ost1_UUID" ] || error "UUID is wrong: $UUID" local NID=$($LCTL list_nids | head -1) echo "OSC changes should return err:" $LCTL conf_param lustre-OST0000.osc.max_dirty_mb=15 && return 7 $LCTL conf_param lustre-OST0000.failover.node=$NID && return 8 echo "ok." echo "MDC changes should succeed:" $LCTL conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=9 || return 9 $LCTL conf_param lustre-MDT0000.failover.node=$NID || return 10 echo "ok."
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -