⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 recovery-small.sh

📁 lustre 1.6.5 source code
💻 SH
📖 第 1 页 / 共 2 页
字号:
#!/bin/bashset -e#         bug  5493ALWAYS_EXCEPT="52 $RECOVERY_SMALL_EXCEPT"#PTLDEBUG=${PTLDEBUG:--1}LUSTRE=${LUSTRE:-`dirname $0`/..}. $LUSTRE/tests/test-framework.shinit_test_env $@. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}# also long tests: 19, 21a, 21e, 21f, 23, 27#                                   1  2.5  2.5    4    4          (min)"[ "$SLOW" = "no" ] && EXCEPT_SLOW="17  26a  26b    50   51     57"build_test_filter# Allow us to override the setup if we already have a mounted system by# setting SETUP=" " and CLEANUP=" "SETUP=${SETUP:-""}CLEANUP=${CLEANUP:-""}cleanup_and_setup_lustrerm -rf $DIR/[df][0-9]*test_1() {    drop_request "mcreate $MOUNT/1"  || return 1    drop_reint_reply "mcreate $MOUNT/2"    || return 2}run_test 1 "mcreate: drop req, drop rep"test_2() {    drop_request "tchmod 111 $MOUNT/2"  || return 1    drop_reint_reply "tchmod 666 $MOUNT/2"    || return 2}run_test 2 "chmod: drop req, drop rep"test_3() {    drop_request "statone $MOUNT/2" || return 1    drop_reply "statone $MOUNT/2"   || return 2}run_test 3 "stat: drop req, drop rep"SAMPLE_NAME=recovery-small.junkSAMPLE_FILE=$TMP/$SAMPLE_NAME# make this big, else test 9 doesn't wait for bulk -- bz 5595dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=4test_4() {    do_facet client "cp $SAMPLE_FILE $MOUNT/$SAMPLE_NAME" || return 1    drop_request "cat $MOUNT/$SAMPLE_NAME > /dev/null"   || return 2    drop_reply "cat $MOUNT/$SAMPLE_NAME > /dev/null"     || return 3}run_test 4 "open: drop req, drop rep"test_5() {    drop_request "mv $MOUNT/$SAMPLE_NAME $MOUNT/renamed" || return 1    drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2    do_facet client "checkstat -v $MOUNT/renamed-again"  || return 3}run_test 5 "rename: drop req, drop rep"[ ! -e $MOUNT/renamed-again ] && cp $SAMPLE_FILE $MOUNT/renamed-againtest_6() {    drop_request "mlink $MOUNT/renamed-again $MOUNT/link1" || return 1    drop_reint_reply "mlink $MOUNT/renamed-again $MOUNT/link2"   || return 2}run_test 6 "link: drop req, drop rep"[ ! -e $MOUNT/link1 ] && mlink $MOUNT/renamed-again $MOUNT/link1[ ! -e $MOUNT/link2 ] && mlink $MOUNT/renamed-again $MOUNT/link2test_7() {    drop_request "munlink $MOUNT/link1"   || return 1    drop_reint_reply "munlink $MOUNT/link2"     || return 2}run_test 7 "unlink: drop req, drop rep"#bug 1423test_8() {    drop_reint_reply "touch $MOUNT/$tfile"    || return 1}run_test 8 "touch: drop rep (bug 1423)"SAMPLE_FILE=$TMP/recovery-small.junkdd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=4#bug 1420test_9() {    pause_bulk "cp /etc/profile $MOUNT/$tfile"       || return 1    do_facet client "cp $SAMPLE_FILE $MOUNT/${tfile}.2"  || return 2    do_facet client "sync"    do_facet client "rm $MOUNT/$tfile $MOUNT/${tfile}.2" || return 3}run_test 9 "pause bulk on OST (bug 1420)"#bug 1521test_10() {    do_facet client mcreate $MOUNT/$tfile        || return 1    drop_bl_callback "chmod 0777 $MOUNT/$tfile"  || echo "evicted as expected"    # wait for the mds to evict the client    #echo "sleep $(($TIMEOUT*2))"    #sleep $(($TIMEOUT*2))    do_facet client touch $MOUNT/$tfile || echo "touch failed, evicted"    do_facet client checkstat -v -p 0777 $MOUNT/$tfile  || return 3    do_facet client "munlink $MOUNT/$tfile"}run_test 10 "finish request on server after client eviction (bug 1521)"#bug 2460# wake up a thread waiting for completion after evictiontest_11(){    do_facet client multiop $MOUNT/$tfile Ow  || return 1    do_facet client multiop $MOUNT/$tfile or  || return 2    cancel_lru_locks osc    do_facet client multiop $MOUNT/$tfile or  || return 3    drop_bl_callback multiop $MOUNT/$tfile Ow || echo "evicted as expected"    do_facet client munlink $MOUNT/$tfile  || return 4}run_test 11 "wake up a thread waiting for completion after eviction (b=2460)"#b=2494test_12(){    $LCTL mark multiop $MOUNT/$tfile OS_c     do_facet mds "sysctl -w lustre.fail_loc=0x115"    clear_failloc mds $((TIMEOUT * 2)) &    multiop_bg_pause $MOUNT/$tfile OS_c || return 1    PID=$!#define OBD_FAIL_MDS_CLOSE_NET           0x115    kill -USR1 $PID    echo "waiting for multiop $PID"    wait $PID || return 2    do_facet client munlink $MOUNT/$tfile  || return 3}run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"# Bug 113, check that readdir lost recv timeout works.test_13() {    mkdir $MOUNT/readdir || return 1    touch $MOUNT/readdir/newentry || return# OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE    do_facet mds "sysctl -w lustre.fail_loc=0x80000104"    ls $MOUNT/readdir || return 3    do_facet mds "sysctl -w lustre.fail_loc=0"    rm -rf $MOUNT/readdir || return 4}run_test 13 "mdc_readpage restart test (bug 1138)"# Bug 113, check that readdir lost send timeout works.test_14() {    mkdir $MOUNT/readdir    touch $MOUNT/readdir/newentry# OBD_FAIL_MDS_SENDPAGE|OBD_FAIL_ONCE    do_facet mds "sysctl -w lustre.fail_loc=0x80000106"    ls $MOUNT/readdir || return 1    do_facet mds "sysctl -w lustre.fail_loc=0"}run_test 14 "mdc_readpage resend test (bug 1138)"test_15() {    do_facet mds "sysctl -w lustre.fail_loc=0x80000128"    touch $DIR/$tfile && return 1    return 0}run_test 15 "failed open (-ENOMEM)"READ_AHEAD=`lctl get_param -n llite.*.max_read_ahead_mb | head -n 1`stop_read_ahead() {   lctl set_param -n llite.*.max_read_ahead_mb 0}start_read_ahead() {   lctl set_param -n llite.*.max_read_ahead_mb $READ_AHEAD}test_16() {    do_facet client cp $SAMPLE_FILE $MOUNT    sync    stop_read_ahead#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE    do_facet ost1 sysctl -w lustre.fail_loc=0x80000504    cancel_lru_locks osc    # OST bulk will time out here, client resends    do_facet client "cmp $SAMPLE_FILE $MOUNT/${SAMPLE_FILE##*/}" || return 1    do_facet ost1 sysctl -w lustre.fail_loc=0    # give recovery a chance to finish (shouldn't take long)    sleep $TIMEOUT    do_facet client "cmp $SAMPLE_FILE $MOUNT/${SAMPLE_FILE##*/}" || return 2    start_read_ahead}run_test 16 "timeout bulk put, don't evict client (2732)"test_17() {    local at_max_saved=0    # With adaptive timeouts, bulk_get won't expire until adaptive_timeout_max    if at_is_valid && at_is_enabled; then        at_max_saved=$(at_max_get ost1)        at_max_set $TIMEOUT ost1    fi    # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE    # OST bulk will time out here, client retries    do_facet ost1 sysctl -w lustre.fail_loc=0x80000503    # need to ensure we send an RPC    do_facet client cp $SAMPLE_FILE $DIR/$tfile    sync    # with AT, client will wait adaptive_max*factor+net_latency before    # expiring the req, hopefully timeout*2 is enough    sleep $(($TIMEOUT*2))    do_facet ost1 sysctl -w lustre.fail_loc=0    do_facet client "df $DIR"    # expect cmp to succeed, client resent bulk    do_facet client "cmp $SAMPLE_FILE $DIR/$tfile" || return 3    do_facet client "rm $DIR/$tfile" || return 4    [ $at_max_saved -ne 0 ] && $(at_max_set $at_max_saved ost1)    return 0}run_test 17 "timeout bulk get, don't evict client (2732)"test_18a() {    [ -z ${ost2_svc} ] && skip "needs 2 osts" && return 0    do_facet client mkdir -p $MOUNT/$tdir    f=$MOUNT/$tdir/$tfile    cancel_lru_locks osc    pgcache_empty || return 1    # 1 stripe on ost2    lfs setstripe $f -s $((128 * 1024)) -i 1 -c 1    do_facet client cp $SAMPLE_FILE $f    sync    local osc2dev=`lctl get_param -n devices | grep ${ost2_svc}-osc- | awk '{print $1}'`    $LCTL --device $osc2dev deactivate || return 3    # my understanding is that there should be nothing in the page    # cache after the client reconnects?         rc=0    pgcache_empty || rc=2    $LCTL --device $osc2dev activate    rm -f $f    return $rc}run_test 18a "manual ost invalidate clears page cache immediately"test_18b() {    do_facet client mkdir -p $MOUNT/$tdir    f=$MOUNT/$tdir/$tfile    f2=$MOUNT/$tdir/${tfile}-2    cancel_lru_locks osc    pgcache_empty || return 1    # shouldn't have to set stripe size of count==1    lfs setstripe $f -s $((128 * 1024)) -i 0 -c 1    lfs setstripe $f2 -s $((128 * 1024)) -i 0 -c 1    do_facet client cp $SAMPLE_FILE $f    sync    ost_evict_client    # force reconnect    df $MOUNT > /dev/null 2>&1    sleep 2    # my understanding is that there should be nothing in the page    # cache after the client reconnects?         rc=0    pgcache_empty || rc=2    rm -f $f $f2    return $rc}run_test 18b "eviction and reconnect clears page cache (2766)"test_18c() {    do_facet client mkdir -p $MOUNT/$tdir    f=$MOUNT/$tdir/$tfile    f2=$MOUNT/$tdir/${tfile}-2    cancel_lru_locks osc    pgcache_empty || return 1    # shouldn't have to set stripe size of count==1    lfs setstripe $f -s $((128 * 1024)) -i 0 -c 1    lfs setstripe $f2 -s $((128 * 1024)) -i 0 -c 1    do_facet client cp $SAMPLE_FILE $f    sync    ost_evict_client    # OBD_FAIL_OST_CONNECT_NET2    # lost reply to connect request    do_facet ost1 sysctl -w lustre.fail_loc=0x80000225    # force reconnect    df $MOUNT > /dev/null 2>&1    sleep 2    # my understanding is that there should be nothing in the page    # cache after the client reconnects?         rc=0    pgcache_empty || rc=2    rm -f $f $f2    return $rc}run_test 18c "Dropped connect reply after eviction handing (14755)"test_19a() {    f=$MOUNT/$tfile    do_facet client mcreate $f        || return 1    drop_ldlm_cancel "chmod 0777 $f"  || echo "evicted as expected"    do_facet client checkstat -v -p 0777 $f  || echo evicted    # let the client reconnect    sleep 5    do_facet client "munlink $f"}run_test 19a "test expired_lock_main on mds (2867)"test_19b() {    f=$MOUNT/$tfile    do_facet client multiop $f Ow  || return 1    do_facet client multiop $f or  || return 2    cancel_lru_locks osc    do_facet client multiop $f or  || return 3    drop_ldlm_cancel multiop $f Ow  || echo "client evicted, as expected"    do_facet client munlink $f  || return 4}run_test 19b "test expired_lock_main on ost (2867)"test_20a() {	# bug 2983 - ldlm_handle_enqueue cleanup	mkdir -p $DIR/$tdir	lfs setstripe $DIR/$tdir/${tfile} -i 0 -c 1	multiop_bg_pause $DIR/$tdir/${tfile} O_wc || return 1	MULTI_PID=$!	cancel_lru_locks osc#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308	do_facet ost1 sysctl -w lustre.fail_loc=0x80000308	kill -USR1 $MULTI_PID	wait $MULTI_PID	rc=$?	[ $rc -eq 0 ] && error "multiop didn't fail enqueue: rc $rc" || true}run_test 20a "ldlm_handle_enqueue error (should return error)" test_20b() {	# bug 2986 - ldlm_handle_enqueue error during open	mkdir -p $DIR/$tdir	lfs setstripe $DIR/$tdir/${tfile} -i 0 -c 1	cancel_lru_locks osc#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308	do_facet ost1 sysctl -w lustre.fail_loc=0x80000308	dd if=/etc/hosts of=$DIR/$tdir/$tfile && \		error "didn't fail open enqueue" || true}run_test 20b "ldlm_handle_enqueue error (should return error)"test_21a() {       mkdir -p $DIR/$tdir-1       mkdir -p $DIR/$tdir-2       multiop_bg_pause $DIR/$tdir-1/f O_c || return 1       close_pid=$!       do_facet mds "sysctl -w lustre.fail_loc=0x80000129"       multiop $DIR/$tdir-2/f Oc &       open_pid=$!       sleep 1       do_facet mds "sysctl -w lustre.fail_loc=0"       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"       kill -USR1 $close_pid       cancel_lru_locks mdc       wait $close_pid || return 1       wait $open_pid || return 2       do_facet mds "sysctl -w lustre.fail_loc=0"       $CHECKSTAT -t file $DIR/$tdir-1/f || return 3       $CHECKSTAT -t file $DIR/$tdir-2/f || return 4       rm -rf $DIR/$tdir-*}run_test 21a "drop close request while close and open are both in flight"test_21b() {       mkdir -p $DIR/$tdir-1       mkdir -p $DIR/$tdir-2       multiop_bg_pause $DIR/$tdir-1/f O_c || return 1       close_pid=$!       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"       mcreate $DIR/$tdir-2/f &       open_pid=$!       sleep 1       do_facet mds "sysctl -w lustre.fail_loc=0"       kill -USR1 $close_pid       cancel_lru_locks mdc       wait $close_pid || return 1       wait $open_pid || return 3       $CHECKSTAT -t file $DIR/$tdir-1/f || return 4       $CHECKSTAT -t file $DIR/$tdir-2/f || return 5       rm -rf $DIR/$tdir-*}run_test 21b "drop open request while close and open are both in flight"test_21c() {       mkdir -p $DIR/$tdir-1       mkdir -p $DIR/$tdir-2       multiop_bg_pause $DIR/$tdir-1/f O_c || return 1       close_pid=$!       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"       mcreate $DIR/$tdir-2/f &       open_pid=$!       sleep 3       do_facet mds "sysctl -w lustre.fail_loc=0"       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"       kill -USR1 $close_pid       cancel_lru_locks mdc       wait $close_pid || return 1       wait $open_pid || return 2       do_facet mds "sysctl -w lustre.fail_loc=0"       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3       rm -rf $DIR/$tdir-*}run_test 21c "drop both request while close and open are both in flight"test_21d() {       mkdir -p $DIR/$tdir-1       mkdir -p $DIR/$tdir-2       multiop_bg_pause $DIR/$tdir-1/f O_c || return 1       pid=$!       do_facet mds "sysctl -w lustre.fail_loc=0x80000129"       multiop $DIR/$tdir-2/f Oc &       sleep 1       do_facet mds "sysctl -w lustre.fail_loc=0"       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"       kill -USR1 $pid       cancel_lru_locks mdc       wait $pid || return 1       do_facet mds "sysctl -w lustre.fail_loc=0"       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3       rm -rf $DIR/$tdir-*}run_test 21d "drop close reply while close and open are both in flight"test_21e() {       mkdir -p $DIR/$tdir-1       mkdir -p $DIR/$tdir-2       multiop_bg_pause $DIR/$tdir-1/f O_c || return 1       pid=$!

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -