📄 conf-sanity.sh
字号:
# With a new good MDT failover nid, we should be able to mount a client # (but it cant talk to OST) local OLDMOUNTOPT=$MOUNTOPT MOUNTOPT="exclude=lustre-OST0000" mount_client $MOUNT MOUNTOPT=$OLDMOUNTOPT set_and_check client "lctl get_param -n mdc.*.max_rpcs_in_flight" "lustre-MDT0000.mdc.max_rpcs_in_flight" || return 11 zconf_umount `hostname` $MOUNT -f cleanup_nocli load_modules # mount a second time to make sure we didnt leave upgrade flag on load_modules $TUNEFS --dryrun $TMP/$tdir/mds || error "tunefs failed" load_modules start mds $TMP/$tdir/mds "-o loop,exclude=lustre-OST0000" || return 12 cleanup_nocli [ -d $TMP/$tdir ] && { rm -rf $TMP/$tdir || true; } # true is only for TMP on NFS}run_test 32a "Upgrade from 1.4 (not live)"test_32b() { # XXX - make this run on client-only systems with real hardware on # the OST and MDT # there appears to be a lot of assumption here about loopback # devices # or maybe this test is just totally useless on a client-only system [ "$mds_HOST" = "`hostname`" ] || { skip "remote MDS" && return 0; } [ "$ost_HOST" = "`hostname`" -o "$ost1_HOST" = "`hostname`" ] || \ { skip "remote OST" && return 0; } [ -z "$TUNEFS" ] && skip "No tunefs" && return local DISK1_4=$LUSTRE/tests/disk1_4.zip [ ! -r $DISK1_4 ] && skip "Cant find $DISK1_4, skipping" && return mkdir -p $TMP/$tdir unzip -o -j -d $TMP/$tdir $DISK1_4 || { skip "Cant unzip $DISK1_4, skipping" && return ; } load_modules sysctl lnet.debug=$PTLDEBUG NEWNAME=sofia # writeconf will cause servers to register with their current nids $TUNEFS --writeconf --fsname=$NEWNAME $TMP/$tdir/mds || error "tunefs failed" start mds $TMP/$tdir/mds "-o loop" || return 3 local UUID=$(lctl get_param -n mds.${NEWNAME}-MDT0000.uuid) echo MDS uuid $UUID [ "$UUID" == "mdsA_UUID" ] || error "UUID is wrong: $UUID" $TUNEFS --mgsnode=`hostname` --fsname=$NEWNAME --writeconf $TMP/$tdir/ost1 || error "tunefs failed" start ost1 $TMP/$tdir/ost1 "-o loop" || return 5 UUID=$(lctl get_param -n obdfilter.${NEWNAME}-OST0000.uuid) echo OST uuid $UUID [ "$UUID" == "ost1_UUID" ] || error "UUID is wrong: $UUID" echo "OSC changes should succeed:" $LCTL conf_param ${NEWNAME}-OST0000.osc.max_dirty_mb=15 || return 7 $LCTL conf_param ${NEWNAME}-OST0000.failover.node=$NID || return 8 echo "ok." echo "MDC changes should succeed:" $LCTL conf_param ${NEWNAME}-MDT0000.mdc.max_rpcs_in_flight=9 || return 9 echo "ok." # MDT and OST should have registered with new nids, so we should have # a fully-functioning client echo "Check client and old fs contents" OLDFS=$FSNAME FSNAME=$NEWNAME mount_client $MOUNT FSNAME=$OLDFS set_and_check client "lctl get_param -n mdc.*.max_rpcs_in_flight" "${NEWNAME}-MDT0000.mdc.max_rpcs_in_flight" || return 11 [ "$(cksum $MOUNT/passwd | cut -d' ' -f 1,2)" == "2479747619 779" ] || return 12 echo "ok." cleanup [ -d $TMP/$tdir ] && { rm -rf $TMP/$tdir || true; } # true is only for TMP on NFS}run_test 32b "Upgrade from 1.4 with writeconf"test_33a() { # bug 12333, was test_33 local rc=0 local FSNAME2=test-123 [ -n "$ost1_HOST" ] && fs2ost_HOST=$ost1_HOST if [ -z "$fs2ost_DEV" -o -z "$fs2mds_DEV" ]; then do_facet mds [ -b "$MDSDEV" ] && \ skip "mixed loopback and real device not working" && return fi local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2} local fs2ostdev=${fs2ost_DEV:-$(ostdevname 1)_2} add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2mdsdev || exit 10 add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME2} --index=8191 --mgsnode=$MGSNID --reformat $fs2ostdev || exit 10 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && trap cleanup_24a EXIT INT start fs2ost $fs2ostdev $OST_MOUNT_OPTS do_facet mds "$LCTL conf_param $FSNAME2.sys.timeout=200" || rc=1 mkdir -p $MOUNT2 mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || rc=2 echo "ok." umount -d $MOUNT2 stop fs2ost -f stop fs2mds -f rm -rf $MOUNT2 $fs2mdsdev $fs2ostdev cleanup_nocli || rc=6 return $rc}run_test 33a "Mount ost with a large index number"test_33b() { # was test_33a setup do_facet client dd if=/dev/zero of=$MOUNT/24 bs=1024k count=1 # Drop lock cancelation reply during umount #define OBD_FAIL_LDLM_CANCEL 0x304 do_facet client sysctl -w lustre.fail_loc=0x80000304 #sysctl -w lnet.debug=-1 umount_client $MOUNT cleanup}run_test 33b "Drop cancel during umount"test_34a() { setup do_facet client "sh runmultiop_bg_pause $DIR/file O_c" manual_umount_client rc=$? do_facet client killall -USR1 multiop if [ $rc -eq 0 ]; then error "umount not fail!" fi sleep 1 cleanup}run_test 34a "umount with opened file should be fail"test_34b() { setup touch $DIR/$tfile || return 1 stop_mds --force || return 2 manual_umount_client --force rc=$? if [ $rc -ne 0 ]; then error "mtab after failed umount - rc $rc" fi cleanup return 0 }run_test 34b "force umount with failed mds should be normal"test_34c() { setup touch $DIR/$tfile || return 1 stop_ost --force || return 2 manual_umount_client --force rc=$? if [ $rc -ne 0 ]; then error "mtab after failed umount - rc $rc" fi cleanup return 0 }run_test 34c "force umount with failed ost should be normal"test_35() { # bug 12459 setup debugsave sysctl -w lnet.debug="ha" log "Set up a fake failnode for the MDS" FAKENID="127.0.0.2" do_facet mds $LCTL conf_param ${FSNAME}-MDT0000.failover.node=$FAKENID || return 4 log "Wait for RECONNECT_INTERVAL seconds (10s)" sleep 10 MSG="conf-sanity.sh test_35 `date +%F%kh%Mm%Ss`" $LCTL clear log "$MSG" log "Stopping the MDT:" stop_mds || return 5 df $MOUNT > /dev/null 2>&1 & DFPID=$! log "Restarting the MDT:" start_mds || return 6 log "Wait for df ($DFPID) ... " wait $DFPID log "done" debugrestore # retrieve from the log the first server that the client tried to # contact after the connection loss $LCTL dk $TMP/lustre-log-$TESTNAME.log NEXTCONN=`awk "/${MSG}/ {start = 1;} /import_select_connection.*${FSNAME}-MDT0000-mdc.* using connection/ { if (start) { if (\\\$NF ~ /$FAKENID/) print \\\$NF; else print 0; exit; } }" $TMP/lustre-log-$TESTNAME.log` [ "$NEXTCONN" != "0" ] && log "The client didn't try to reconnect to the last active server (tried ${NEXTCONN} instead)" && return 7 cleanup}run_test 35 "Reconnect to the last active server first"test_36() { # 12743 local rc local FSNAME2=test1234 local fs3ost_HOST=$ost_HOST [ -n "$ost1_HOST" ] && fs2ost_HOST=$ost1_HOST && fs3ost_HOST=$ost1_HOST rc=0 if [ -z "$fs2ost_DEV" -o -z "$fs2mds_DEV" -o -z "$fs3ost_DEV" ]; then do_facet mds [ -b "$MDSDEV" ] && \ skip "mixed loopback and real device not working" && return fi [ $OSTCOUNT -lt 2 ] && skip "skipping test for single OST" && return [ "$ost_HOST" = "`hostname`" -o "$ost1_HOST" = "`hostname`" ] || \ { skip "remote OST" && return 0; } local fs2mdsdev=${fs2mds_DEV:-${MDSDEV}_2} local fs2ostdev=${fs2ost_DEV:-$(ostdevname 1)_2} local fs3ostdev=${fs3ost_DEV:-$(ostdevname 2)_2} add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME2} --reformat $fs2mdsdev || exit 10 # XXX after we support non 4K disk blocksize, change following --mkfsoptions with # other argument add fs2ost $OST_MKFS_OPTS --mkfsoptions='-b4096' --fsname=${FSNAME2} --mgsnode=$MGSNID --reformat $fs2ostdev || exit 10 add fs3ost $OST_MKFS_OPTS --mkfsoptions='-b4096' --fsname=${FSNAME2} --mgsnode=$MGSNID --reformat $fs3ostdev || exit 10 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS start fs2ost $fs2ostdev $OST_MOUNT_OPTS start fs3ost $fs3ostdev $OST_MOUNT_OPTS mkdir -p $MOUNT2 mount -t lustre $MGSNID:/${FSNAME2} $MOUNT2 || return 1 sleep 5 # until 11778 fixed dd if=/dev/zero of=$MOUNT2/$tfile bs=1M count=7 || return 2 BKTOTAL=`lctl get_param -n obdfilter.*.kbytestotal | awk 'BEGIN{total=0}; {total+=$1}; END{print total}'` BKFREE=`lctl get_param -n obdfilter.*.kbytesfree | awk 'BEGIN{free=0}; {free+=$1}; END{print free}'` BKAVAIL=`lctl get_param -n obdfilter.*.kbytesavail | awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}'` STRING=`df -P $MOUNT2 | tail -n 1 | awk '{print $2","$3","$4}'` DFTOTAL=`echo $STRING | cut -d, -f1` DFUSED=`echo $STRING | cut -d, -f2` DFAVAIL=`echo $STRING | cut -d, -f3` DFFREE=$(($DFTOTAL - $DFUSED)) ALLOWANCE=$((64 * $OSTCOUNT)) if [ $DFTOTAL -lt $(($BKTOTAL - $ALLOWANCE)) ] || [ $DFTOTAL -gt $(($BKTOTAL + $ALLOWANCE)) ] ; then echo "**** FAIL: df total($DFTOTAL) mismatch OST total($BKTOTAL)" rc=1 fi if [ $DFFREE -lt $(($BKFREE - $ALLOWANCE)) ] || [ $DFFREE -gt $(($BKFREE + $ALLOWANCE)) ] ; then echo "**** FAIL: df free($DFFREE) mismatch OST free($BKFREE)" rc=2 fi if [ $DFAVAIL -lt $(($BKAVAIL - $ALLOWANCE)) ] || [ $DFAVAIL -gt $(($BKAVAIL + $ALLOWANCE)) ] ; then echo "**** FAIL: df avail($DFAVAIL) mismatch OST avail($BKAVAIL)" rc=3 fi umount -d $MOUNT2 stop fs3ost -f || return 200 stop fs2ost -f || return 201 stop fs2mds -f || return 202 rm -rf $MOUNT2 $fs2mdsdev $fs2ostdev $fs3ostdev unload_modules || return 203 return $rc}run_test 36 "df report consistency on OSTs with different block size"test_37() { [ -n "$CLIENTONLY" -o -n "$CLIENTMODSONLY" ] && skip "client only testing" && return 0 LOCAL_MDSDEV="$TMP/mdt.img" SYM_MDSDEV="$TMP/sym_mdt.img" echo "MDS : $LOCAL_MDSDEV" echo "SYMLINK : $SYM_MDSDEV" rm -f $LOCAL_MDSDEV touch $LOCAL_MDSDEV mkfs.lustre --reformat --fsname=lustre --mdt --mgs --device-size=9000 $LOCAL_MDSDEV || error "mkfs.lustre $LOCAL_MDSDEV failed" ln -s $LOCAL_MDSDEV $SYM_MDSDEV echo "mount symlink device - $SYM_MDSDEV" mount_op=`mount -v -t lustre -o loop $SYM_MDSDEV ${MOUNT%/*}/mds 2>&1 | grep "unable to set tunable"` umount -d ${MOUNT%/*}/mds rm -f $LOCAL_MDSDEV $SYM_MDSDEV if [ -n "$mount_op" ]; then error "**** FAIL: set tunables failed for symlink device" fi return 0}run_test 37 "verify set tunables works for symlink device"test_38() { # bug 14222 setup # like runtests COUNT=10 SRC="/etc /bin" FILES=`find $SRC -type f -mtime +1 | head -n $COUNT` log "copying $(echo $FILES | wc -w) files to $DIR/$tdir" mkdir -p $DIR/$tdir tar cf - $FILES | tar xf - -C $DIR/$tdir || \ error "copying $SRC to $DIR/$tdir" sync umount_client $MOUNT stop_mds log "rename lov_objid file on MDS" rm -f $TMP/lov_objid.orig do_facet mds "debugfs -c -R \\\"dump lov_objid $TMP/lov_objid.orig\\\" $MDSDEV" do_facet mds "debugfs -w -R \\\"rm lov_objid\\\" $MDSDEV" do_facet mds "od -Ax -td8 $TMP/lov_objid.orig" # check create in mds_lov_connect start_mds mount_client $MOUNT for f in $FILES; do [ $V ] && log "verifying $DIR/$tdir/$f" diff -q $f $DIR/$tdir/$f || ERROR=y done do_facet mds "debugfs -c -R \\\"dump lov_objid $TMP/lov_objid.new\\\" $MDSDEV" do_facet mds "od -Ax -td8 $TMP/lov_objid.new" [ "$ERROR" = "y" ] && error "old and new files are different after connect" || true # check it's updates in sync umount_client $MOUNT stop_mds do_facet mds dd if=/dev/zero of=$TMP/lov_objid.clear bs=4096 count=1 do_facet mds "debugfs -w -R \\\"rm lov_objid\\\" $MDSDEV" do_facet mds "debugfs -w -R \\\"write $TMP/lov_objid.clear lov_objid\\\" $MDSDEV " start_mds mount_client $MOUNT for f in $FILES; do [ $V ] && log "verifying $DIR/$tdir/$f" diff -q $f $DIR/$tdir/$f || ERROR=y done do_facet mds "debugfs -c -R \\\"dump lov_objid $TMP/lov_objid.new1\\\" $MDSDEV" do_facet mds "od -Ax -td8 $TMP/lov_objid.new1" umount_client $MOUNT stop_mds [ "$ERROR" = "y" ] && error "old and new files are different after sync" || true log "files compared the same" cleanup}run_test 38 "MDS recreates missing lov_objid file from OST data"test_39() { #bug 14413 PTLDEBUG=+malloc setup cleanup perl $SRCDIR/leak_finder.pl $TMP/debug 2>&1 | egrep '*** Leak:' && error "memory leak detected" || true}run_test 39 "leak_finder recognizes both LUSTRE and LNET malloc messages"test_40() { # bug 15759 start_ost #define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 do_facet mds "sysctl -w lustre.fail_loc=0x80000706" start_mds cleanup}run_test 40 "race during service thread startup"equals_msg `basename $0`: test complete[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -