⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 replay-single.sh

📁 lustre 1.6.5 source code
💻 SH
📖 第 1 页 / 共 3 页
字号:
test_53b() {    mkdir -p $DIR/$tdir-1    mkdir -p $DIR/$tdir-2    multiop $DIR/$tdir-1/f O_c &    close_pid=$!    #define OBD_FAIL_MDS_REINT_NET 0x107    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"    mcreate $DIR/${tdir}-2/f &    open_pid=$!    sleep 1    do_facet mds "sysctl -w lustre.fail_loc=0"    kill -USR1 $close_pid    cancel_lru_locks MDC # force the close    wait $close_pid || return 1    # open should still be here    [ -d /proc/$open_pid ] || return 2    replay_barrier_nodf mds    fail mds    wait $open_pid || return 3    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5    rm -rf $DIR/${tdir}-*}run_test 53b "|X| open request while two MDC requests in flight"test_53c() {    mkdir -p $DIR/${tdir}-1    mkdir -p $DIR/${tdir}-2    multiop $DIR/${tdir}-1/f O_c &    close_pid=$!    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"    mcreate $DIR/${tdir}-2/f &    open_pid=$!    sleep 1    do_facet mds "sysctl -w lustre.fail_loc=0x80000115"    kill -USR1 $close_pid    cancel_lru_locks MDC  # force the close    replay_barrier_nodf mds    fail_nodf mds    wait $open_pid || return 1    sleep 2    # close should be gone    [ -d /proc/$close_pid ] && return 2    do_facet mds "sysctl -w lustre.fail_loc=0"    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4    rm -rf $DIR/${tdir}-*}run_test 53c "|X| open request and close request while two MDC requests in flight"test_53d() {    mkdir -p $DIR/${tdir}-1    mkdir -p $DIR/${tdir}-2    multiop $DIR/${tdir}-1/f O_c &    close_pid=$!    # give multiop a chance to open    sleep 1    # define OBD_FAIL_MDS_CLOSE_NET_REP 0X138        do_facet mds "sysctl -w lustre.fail_loc=0x8000013b"    kill -USR1 $close_pid    cancel_lru_locks MDC  # force the close    do_facet mds "sysctl -w lustre.fail_loc=0"    mcreate $DIR/${tdir}-2/f || return 1        # close should still be here    [ -d /proc/$close_pid ] || return 2    replay_barrier_nodf mds    fail mds    wait $close_pid || return 3    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5    rm -rf $DIR/${tdir}-*}run_test 53d "|X| close reply while two MDC requests in flight"test_53e() {    mkdir -p $DIR/$tdir-1    mkdir -p $DIR/$tdir-2    multiop $DIR/$tdir-1/f O_c &    close_pid=$!    #define OBD_FAIL_MDS_REINT_NET_REP       0x119    do_facet mds "sysctl -w lustre.fail_loc=0x80000119"    mcreate $DIR/${tdir}-2/f &    open_pid=$!    sleep 1        do_facet mds "sysctl -w lustre.fail_loc=0"    kill -USR1 $close_pid    cancel_lru_locks MDC  # force the close    wait $close_pid || return 1    # open should still be here    [ -d /proc/$open_pid ] || return 2        replay_barrier_nodf mds    fail mds    wait $open_pid || return 3    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5    rm -rf $DIR/${tdir}-*}run_test 53e "|X| open reply while two MDC requests in flight"test_53f() {        mkdir -p $DIR/${tdir}-1        mkdir -p $DIR/${tdir}-2        multiop $DIR/${tdir}-1/f O_c &        close_pid=$!        do_facet mds "sysctl -w lustre.fail_loc=0x80000119"        mcreate $DIR/${tdir}-2/f &        open_pid=$!        sleep 1        do_facet mds "sysctl -w lustre.fail_loc=0x8000013b"        kill -USR1 $close_pid        cancel_lru_locks MDC        replay_barrier_nodf mds        fail_nodf mds        wait $open_pid || return 1        sleep 2        #close should be gone        [ -d /proc/$close_pid ] && return 2        do_facet mds "sysctl -w lustre.fail_loc=0"        $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3        $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4        rm -rf $DIR/${tdir}-*}run_test 53f "|X| open reply and close reply while two MDC requests in flight"test_53g() {        mkdir -p $DIR/${tdir}-1        mkdir -p $DIR/${tdir}-2        multiop $DIR/${tdir}-1/f O_c &        close_pid=$!        do_facet mds "sysctl -w lustre.fail_loc=0x80000119"        mcreate $DIR/${tdir}-2/f &        open_pid=$!        sleep 1        do_facet mds "sysctl -w lustre.fail_loc=0x80000115"        kill -USR1 $close_pid        cancel_lru_locks MDC # force the close        do_facet mds "sysctl -w lustre.fail_loc=0"        replay_barrier_nodf mds        fail_nodf mds        wait $open_pid || return 1        sleep 2        # close should be gone        [ -d /proc/$close_pid ] && return 2        $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3        $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4        rm -rf $DIR/${tdir}-*}run_test 53g "|X| drop open reply and close request while close and open are both in flight"test_53h() {    mkdir -p $DIR/${tdir}-1    mkdir -p $DIR/${tdir}-2    multiop $DIR/${tdir}-1/f O_c &    close_pid=$!    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"    mcreate $DIR/${tdir}-2/f &    open_pid=$!    sleep 1        do_facet mds "sysctl -w lustre.fail_loc=0x8000013b"    kill -USR1 $close_pid    cancel_lru_locks MDC  # force the close    sleep 1    replay_barrier_nodf mds    fail_nodf mds    wait $open_pid || return 1    sleep 2    # close should be gone    [ -d /proc/$close_pid ] && return 2    do_facet mds "sysctl -w lustre.fail_loc=0"    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4    rm -rf $DIR/${tdir}-*}run_test 53h "|X| open request and close reply while two MDC requests in flight"#b3761 ASSERTION(hash != 0) failedtest_55() {# OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE    do_facet mds "sysctl -w lustre.fail_loc=0x8000012b"    touch $DIR/$tfile &    # give touch a chance to run    sleep 5    do_facet mds "sysctl -w lustre.fail_loc=0x0"    rm $DIR/$tfile    return 0}run_test 55 "let MDS_CHECK_RESENT return the original return code instead of 0"#b3440 ASSERTION(rec->ur_fid2->id) failedtest_56() {    ln -s foo $DIR/$tfile    replay_barrier mds    #drop_reply "cat $DIR/$tfile"    fail mds    sleep 10}run_test 56 "don't replay a symlink open request (3440)"#recovery one mds-ost setattr from llogtest_57() {#define OBD_FAIL_MDS_OST_SETATTR       0x12c    do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"    touch $DIR/$tfile    replay_barrier mds    fail mds    sleep 1    $CHECKSTAT -t file $DIR/$tfile || return 1    do_facet mds "sysctl -w lustre.fail_loc=0x0"    rm $DIR/$tfile}run_test 57 "test recovery from llog for setattr op"#recovery many mds-ost setattr from llogtest_58() {#define OBD_FAIL_MDS_OST_SETATTR       0x12c    do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"    createmany -o $DIR/$tdir/$tfile-%d 2500    replay_barrier mds    fail mds    sleep 2    $CHECKSTAT -t file $DIR/$tdir/$tfile-* >/dev/null || return 1    do_facet mds "sysctl -w lustre.fail_loc=0x0"    unlinkmany $DIR/$tdir/$tfile-%d 2500    rmdir $DIR/$tdir}run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)"# log_commit_thread vs filter_destroy race used to lead to import use after free# bug 11658test_59() {    createmany -o $DIR/$tdir/$tfile-%d 200    sync    unlinkmany $DIR/$tdir/$tfile-%d 200#define OBD_FAIL_PTLRPC_DELAY_RECOV       0x507    do_facet ost1 "sysctl -w lustre.fail_loc=0x507"    fail ost1    fail mds    do_facet ost1 "sysctl -w lustre.fail_loc=0x0"    sleep 20    rmdir $DIR/$tdir}run_test 59 "test log_commit_thread vs filter_destroy race"# race between add unlink llog vs cat log init in post_recovery (only for b1_6)# bug 12086: should no oops and No ctxt error for this testtest_60() {    createmany -o $DIR/$tdir/$tfile-%d 200    replay_barrier mds    unlinkmany $DIR/$tdir/$tfile-%d 0 100    fail mds    unlinkmany $DIR/$tdir/$tfile-%d 100 100    local no_ctxt=`dmesg | grep "No ctxt"`    [ -z "$no_ctxt" ] || error "ctxt is not initialized in recovery" }run_test 60 "test llog post recovery init vs llog unlink"#test race  llog recovery thread vs llog cleanuptest_61a() {    createmany -o $DIR/$tdir/$tfile-%d 800    replay_barrier ost1 #   OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221     unlinkmany $DIR/$tdir/$tfile-%d 800     do_facet ost "sysctl -w lustre.fail_loc=0x80000221"    facet_failover ost1    sleep 10     fail ost1    sleep 30    do_facet ost "sysctl -w lustre.fail_loc=0x0"    $CHECKSTAT -t file $DIR/$tdir/$tfile-* && return 1    rmdir $DIR/$tdir}run_test 61a "test race llog recovery vs llog cleanup"#test race  mds llog sync vs llog cleanuptest_61b() {#   OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a     do_facet mds "sysctl -w lustre.fail_loc=0x8000013a"    facet_failover mds     sleep 10    fail mds    do_facet client dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 || return 1}run_test 61b "test race mds llog sync vs llog cleanup"#test race  cancel cookie cb vs llog cleanuptest_61c() {#   OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222     touch $DIR/$tfile     do_facet ost "sysctl -w lustre.fail_loc=0x80000222"    rm $DIR/$tfile        sleep 10    fail ost1}run_test 61c "test race mds llog sync vs llog cleanup"#Adaptive Timeouts (bug 3055)AT_MAX_SET=0at_start(){    at_is_valid || skip "AT env is invalid"    if ! at_is_enabled; then        echo "AT is disabled, enable it by force temporarily"        at_max_set 600 mds ost client        AT_MAX_SET=1    fi    if [ -z "$ATOLDBASE" ]; then	local at_history=$(do_facet mds "find /sys/ -name at_history")	[ -z "$at_history" ] && skip "missing /sys/.../at_history " && return 1	ATOLDBASE=$(do_facet mds "cat $at_history")        # speed up the timebase so we can check decreasing AT	do_facet mds "echo 8 >> $at_history"	do_facet ost1 "echo 8 >> $at_history"    fi}test_65a() #bug 3055{    at_start || return 0    $LCTL dk > /dev/null    debugsave    sysctl -w lnet.debug="+other"    # slow down a request    do_facet mds sysctl -w lustre.fail_val=30000#define OBD_FAIL_PTLRPC_PAUSE_REQ        0x50a    do_facet mds sysctl -w lustre.fail_loc=0x8000050a    createmany -o $DIR/$tfile 10 > /dev/null    unlinkmany $DIR/$tfile 10 > /dev/null    # check for log message    $LCTL dk | grep "Early reply #" || error "No early reply"     # client should show 30s estimates    grep portal $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts    sleep 9    grep portal $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts}run_test 65a "AT: verify early replies"test_65b() #bug 3055{    at_start || return 0    # turn on D_ADAPTTO    debugsave    sysctl -w lnet.debug="+other"    $LCTL dk > /dev/null    # slow down bulk i/o    do_facet ost1 sysctl -w lustre.fail_val=30#define OBD_FAIL_OST_BRW_PAUSE_PACK      0x224    do_facet ost1 sysctl -w lustre.fail_loc=0x224    rm -f $DIR/$tfile    lfs setstripe $DIR/$tfile --index=0 --count=1    # force some real bulk transfer    multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c    do_facet ost1 sysctl -w lustre.fail_loc=0    # check for log message    $LCTL dk | grep "Early reply #" || error "No early reply"    debugrestore    # client should show 30s estimates    grep portal $LPROC/osc/${FSNAME}-OST0000-osc-*/timeouts}run_test 65b "AT: verify early replies on packed reply / bulk"test_66a() #bug 3055{    at_start || return 0    grep "portal 12" $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts    # adjust 5s at a time so no early reply is sent (within deadline)    do_facet mds "sysctl -w lustre.fail_val=5000"#define OBD_FAIL_PTLRPC_PAUSE_REQ        0x50a    do_facet mds "sysctl -w lustre.fail_loc=0x8000050a"    createmany -o $DIR/$tfile 20 > /dev/null    unlinkmany $DIR/$tfile 20 > /dev/null    grep "portal 12" $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts    do_facet mds "sysctl -w lustre.fail_val=10000"    do_facet mds "sysctl -w lustre.fail_loc=0x8000050a"    createmany -o $DIR/$tfile 20 > /dev/null    unlinkmany $DIR/$tfile 20 > /dev/null    grep "portal 12" $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts    do_facet mds "sysctl -w lustre.fail_loc=0"    sleep 9    createmany -o $DIR/$tfile 20 > /dev/null    unlinkmany $DIR/$tfile 20 > /dev/null    grep portal $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts | grep "portal 12"    CUR=$(awk '/portal 12/ {print $5}' $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts)    WORST=$(awk '/portal 12/ {print $7}' $LPROC/mdc/${FSNAME}-MDT0000-mdc-*/timeouts)    echo "Current MDT timeout $CUR, worst $WORST"    [ $CUR -lt $WORST ] || error "Current $CUR should be less than worst $WORST" }run_test 66a "AT: verify MDT service time adjusts with no early replies"test_66b() #bug 3055{    at_start || return 0    ORIG=$(awk '/network/ {print $4}' $LPROC/mdc/${FSNAME}-*/timeouts)    sysctl -w lustre.fail_val=$(($ORIG + 5))#define OBD_FAIL_PTLRPC_PAUSE_REP      0x50c    sysctl -w lustre.fail_loc=0x50c    ls $DIR/$tfile > /dev/null 2>&1    sysctl -w lustre.fail_loc=0    CUR=$(awk '/network/ {print $4}' $LPROC/mdc/${FSNAME}-*/timeouts)    WORST=$(awk '/network/ {print $6}' $LPROC/mdc/${FSNAME}-*/timeouts)    echo "network timeout orig $ORIG, cur $CUR, worst $WORST"    [ $WORST -gt $ORIG ] || error "Worst $WORST should be worse than orig $ORIG" }run_test 66b "AT: verify net latency adjusts"test_67a() #bug 3055{    at_start || return 0    CONN1=$(awk '/_connect/ {total+=$2} END {print total}' $LPROC/osc/*/stats)    # sleeping threads may drive values above this    do_facet ost1 "sysctl -w lustre.fail_val=400"#define OBD_FAIL_PTLRPC_PAUSE_REQ    0x50a    do_facet ost1 "sysctl -w lustre.fail_loc=0x50a"    createmany -o $DIR/$tfile 20 > /dev/null    unlinkmany $DIR/$tfile 20 > /dev/null    do_facet ost1 "sysctl -w lustre.fail_loc=0"    CONN2=$(awk '/_connect/ {total+=$2} END {print total}' $LPROC/osc/*/stats)    ATTEMPTS=$(($CONN2 - $CONN1))    echo "$ATTEMPTS osc reconnect attemps on gradual slow"    [ $ATTEMPTS -gt 0 ] && error_ignore 13721 "AT should have prevented reconnect"    return 0}run_test 67a "AT: verify slow request processing doesn't induce reconnects"test_67b() #bug 3055{    at_start || return 0    CONN1=$(awk '/_connect/ {total+=$2} END {print total}' $LPROC/osc/*/stats)#define OBD_FAIL_OST_PAUSE_CREATE        0x223    do_facet ost1 "sysctl -w lustre.fail_val=20000"    do_facet ost1 "sysctl -w lustre.fail_loc=0x80000223"    cp /etc/profile $DIR/$tfile || error "cp failed"    client_reconnect    cat $LPROC/ost/OSS/ost_create/timeouts    log "phase 2"    CONN2=$(awk '/_connect/ {total+=$2} END {print total}' $LPROC/osc/*/stats)    ATTEMPTS=$(($CONN2 - $CONN1))    echo "$ATTEMPTS osc reconnect attemps on instant slow"    # do it again; should not timeout    do_facet ost1 "sysctl -w lustre.fail_loc=0x80000223"    cp /etc/profile $DIR/$tfile || error "cp failed"    do_facet ost1 "sysctl -w lustre.fail_loc=0"    client_reconnect    cat $LPROC/ost/OSS/ost_create/timeouts    CONN3=$(awk '/_connect/ {total+=$2} END {print total}' $LPROC/osc/*/stats)    ATTEMPTS=$(($CONN3 - $CONN2))    echo "$ATTEMPTS osc reconnect attemps on 2nd slow"    [ $ATTEMPTS -gt 0 ] && error "AT should have prevented reconnect"    return 0}run_test 67b "AT: verify instant slowdown doesn't induce reconnects"test_68 () #bug 13813{    at_start || return 0    local ldlm_enqueue_min=$(find /sys -name ldlm_enqueue_min)    [ -z "$ldlm_enqueue_min" ] && skip "missing /sys/.../ldlm_enqueue_min" && return 0    local ENQ_MIN=$(cat $ldlm_enqueue_min)    echo $TIMEOUT >> $ldlm_enqueue_min    rm -f $DIR/${tfile}_[1-2]    lfs setstripe $DIR/$tfile --index=0 --count=1#define OBD_FAIL_LDLM_PAUSE_CANCEL       0x312    sysctl -w lustre.fail_val=$(($TIMEOUT - 1))    sysctl -w lustre.fail_loc=0x80000312    cp /etc/profile $DIR/${tfile}_1 || error "1st cp failed $?"    sysctl -w lustre.fail_val=$((TIMEOUT * 3 / 2))    sysctl -w lustre.fail_loc=0x80000312    cp /etc/profile $DIR/${tfile}_2 || error "2nd cp failed $?"    sysctl -w lustre.fail_loc=0    echo $ENQ_MIN >> $ldlm_enqueue_min    return 0}run_test 68 "AT: verify slowing locks"if [ -n "$ATOLDBASE" ]; then    at_history=$(do_facet mds "find /sys/ -name at_history")    do_facet mds "echo $ATOLDBASE >> $at_history" || true    do_facet ost1 "echo $ATOLDBASE >> $at_history" || truefiif [ $AT_MAX_SET -ne 0 ]; then    echo "restore AT status to be disabled"    at_max_set 0 mds ost clientfi# end of AT tests includes above linesequals_msg `basename $0`: test complete, cleaning upcheck_and_cleanup_lustre[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -