📄 ndbrestarts.cpp
字号:
"Could not restart node "<< nodeId); // Create random value, max 10 secs int max = 10; int seconds = (myRandom48(max)) + 1; g_info << "Waiting for " << seconds << "(" << max << ") secs " << endl; NdbSleep_SecSleep(seconds); nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand()); g_info << _restart->m_name << ": node = "<< nodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0, "Could not restart node "<< nodeId); return NDBT_OK;}int twoMasterNodeFailure(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ int nodeId = _restarter.getDbNodeId(0); g_info << _restart->m_name << ": node = "<< nodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, 39999) == 0, "Could not restart node "<< nodeId); // Create random value, max 10 secs int max = 10; int seconds = (myRandom48(max)) + 1; g_info << "Waiting for " << seconds << "(" << max << ") secs " << endl; NdbSleep_SecSleep(seconds); nodeId = _restarter.getDbNodeId(0); g_info << _restart->m_name << ": node = "<< nodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, 39999) == 0, "Could not restart node "<< nodeId); return NDBT_OK;}int get50PercentOfNodes(NdbRestarter& restarter, int * _nodes){ // For now simply return all nodes with even node id // TODO Check nodegroup and return one node from each int num50Percent = restarter.getNumDbNodes() / 2; assert(num50Percent <= MAX_NDB_NODES); // Calculate which nodes to stop, select all even nodes for (int i = 0; i < num50Percent; i++){ _nodes[i] = restarter.getDbNodeId(i*2); } return num50Percent;}int fiftyPercentFail(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ int nodes[MAX_NDB_NODES]; int numNodes = get50PercentOfNodes(_restarter, nodes); // Stop the nodes, with nostart and abort for (int i = 0; i < numNodes; i++){ g_info << "Stopping node "<< nodes[i] << endl; int res = _restarter.restartOneDbNode(nodes[i], false, true, true); CHECK(res == 0, "Could not stop node: "<< nodes[i]); } CHECK(_restarter.waitNodesNoStart(nodes, numNodes) == 0, "waitNodesNoStart"); // Order all nodes to start ndbout << "Starting all nodes" << endl; CHECK(_restarter.startAll() == 0, "Could not start all nodes"); return NDBT_OK;}int restartAllNodesGracfeul(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ g_info << _restart->m_name << endl; // Restart graceful CHECK(_restarter.restartAll() == 0, "Could not restart all nodes"); return NDBT_OK;}int restartAllNodesAbort(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ g_info << _restart->m_name << endl; // Restart abort CHECK(_restarter.restartAll(false, false, true) == 0, "Could not restart all nodes"); return NDBT_OK;}int restartAllNodesError9999(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ g_info << _restart->m_name << endl; // Restart with error insert CHECK(_restarter.insertErrorInAllNodes(9999) == 0, "Could not restart all nodes "); return NDBT_OK;}int fiftyPercentStopAndWait(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ int nodes[MAX_NDB_NODES]; int numNodes = get50PercentOfNodes(_restarter, nodes); // Stop the nodes, with nostart and abort for (int i = 0; i < numNodes; i++){ g_info << "Stopping node "<<nodes[i] << endl; int res = _restarter.restartOneDbNode(nodes[i], false, true, true); CHECK(res == 0, "Could not stop node: "<< nodes[i]); } CHECK(_restarter.waitNodesNoStart(nodes, numNodes) == 0, "waitNodesNoStart"); // Create random value, max 120 secs int max = 120; int seconds = (myRandom48(max)) + 1; g_info << "Waiting for " << seconds << "(" << max << ") secs " << endl; NdbSleep_SecSleep(seconds); // Restart graceful CHECK(_restarter.restartAll() == 0, "Could not restart all nodes"); g_info << _restart->m_name << endl; return NDBT_OK;}intNFDuringNR_codes[] = { 7121, 5027, 7172, 6000, 6001, 6002, 7171, 7130, 7133, 7138, 7154, 7144, 5026, 7139, 7132, //LCP 8000, 8001, 5010, 7022, 7024, 7016, 7017, 5002};int restartNFDuringNR(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ myRandom48Init(NdbTick_CurrentMillisecond()); int i; const int sz = sizeof(NFDuringNR_codes)/sizeof(NFDuringNR_codes[0]); for(i = 0; i<sz; i++){ int randomId = myRandom48(_restarter.getNumDbNodes()); int nodeId = _restarter.getDbNodeId(randomId); int error = NFDuringNR_codes[i]; g_info << _restart->m_name << ": node = " << nodeId << " error code = " << error << endl; CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, "Could not restart node "<< nodeId); CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ; CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, error) == 0, "failed to set error insert"); CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); NdbSleep_SecSleep(3); //CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, // "waitNodesNoStart failed"); _restarter.waitNodesNoStart(&nodeId, 1); CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); CHECK(_restarter.waitNodesStarted(&nodeId, 1) == 0, "waitNodesStarted failed"); } return NDBT_OK; if(_restarter.getNumDbNodes() < 4) return NDBT_OK; char buf[256]; if(NdbEnv_GetEnv("USER", buf, 256) == 0 || strcmp(buf, "ejonore") != 0) return NDBT_OK; for(i = 0; i<sz; i++){ const int randomId = myRandom48(_restarter.getNumDbNodes()); int nodeId = _restarter.getDbNodeId(randomId); const int error = NFDuringNR_codes[i]; const int masterNodeId = _restarter.getMasterNodeId(); CHECK(masterNodeId > 0, "getMasterNodeId failed"); int crashNodeId = 0; do { int rand = myRandom48(1000); crashNodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand); } while(crashNodeId == masterNodeId); CHECK(crashNodeId > 0, "getMasterNodeId failed"); g_info << _restart->m_name << " restarting node = " << nodeId << " error code = " << error << " crash node = " << crashNodeId << endl; CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, "Could not restart node "<< nodeId); CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; CHECK(_restarter.dumpStateOneNode(crashNodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(crashNodeId, error) == 0, "failed to set error insert"); CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); } return NDBT_OK;}intNRDuringLCP_Master_codes[] = { 7009, // Insert system error in master when local checkpoint is idle. 7010, // Insert system error in master when local checkpoint is in the // state clcpStatus = CALCULATE_KEEP_GCI. 7013, // Insert system error in master when local checkpoint is in the // state clcpStatus = COPY_GCI before sending COPY_GCIREQ. 7014, // Insert system error in master when local checkpoint is in the // state clcpStatus = TC_CLOPSIZE before sending TC_CLOPSIZEREQ. 7015, // Insert system error in master when local checkpoint is in the // state clcpStatus = START_LCP_ROUND before sending START_LCP_ROUND. 7019, // Insert system error in master when local checkpoint is in the // state clcpStatus = IDLE before sending CONTINUEB(ZCHECK_TC_COUNTER). 7075, // Master. Don't send any LCP_FRAG_ORD(last=true) // And crash when all have "not" been sent 7021, // Crash in master when receiving START_LCP_REQ 7023, // Crash in master when sending START_LCP_CONF 7025, // Crash in master when receiving LCP_FRAG_REP 7026, // Crash in master when changing state to LCP_TAB_COMPLETED 7027 // Crash in master when changing state to LCP_TAB_SAVED};intNRDuringLCP_NonMaster_codes[] = { 7020, // Insert system error in local checkpoint participant at reception // of COPY_GCIREQ. 8000, // Crash particpant when receiving TCGETOPSIZEREQ 8001, // Crash particpant when receiving TC_CLOPSIZEREQ 5010, // Crash any when receiving LCP_FRAGORD 7022, // Crash in !master when receiving START_LCP_REQ 7024, // Crash in !master when sending START_LCP_CONF 7016, // Crash in !master when receiving LCP_FRAG_REP 7017, // Crash in !master when changing state to LCP_TAB_COMPLETED 7018 // Crash in !master when changing state to LCP_TAB_SAVED};int restartNodeDuringLCP(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart) { int i; // Master int val = DumpStateOrd::DihMinTimeBetweenLCP; CHECK(_restarter.dumpStateAllNodes(&val, 1) == 0, "Failed to set LCP to min value"); // Set LCP to min val int sz = sizeof(NRDuringLCP_Master_codes)/ sizeof(NRDuringLCP_Master_codes[0]); for(i = 0; i<sz; i++) { int error = NRDuringLCP_Master_codes[i]; int masterNodeId = _restarter.getMasterNodeId(); CHECK(masterNodeId > 0, "getMasterNodeId failed"); ndbout << _restart->m_name << " restarting master node = " << masterNodeId << " error code = " << error << endl; { int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; CHECK(_restarter.dumpStateAllNodes(val, 2) == 0, "failed to set RestartOnErrorInsert"); } CHECK(_restarter.insertErrorInNode(masterNodeId, error) == 0, "failed to set error insert"); CHECK(_restarter.waitNodesNoStart(&masterNodeId, 1, 300) == 0, "failed to wait no start"); CHECK(_restarter.startNodes(&masterNodeId, 1) == 0, "failed to start node"); CHECK(_restarter.waitClusterStarted(300) == 0, "waitClusterStarted failed"); { int val = DumpStateOrd::DihMinTimeBetweenLCP; CHECK(_restarter.dumpStateOneNode(masterNodeId, &val, 1) == 0, "failed to set error insert"); } } // NON-Master sz = sizeof(NRDuringLCP_NonMaster_codes)/ sizeof(NRDuringLCP_NonMaster_codes[0]); for(i = 0; i<sz; i++) { int error = NRDuringLCP_NonMaster_codes[i]; int nodeId = getRandomNodeId(_restarter); int masterNodeId = _restarter.getMasterNodeId(); CHECK(masterNodeId > 0, "getMasterNodeId failed"); while (nodeId == masterNodeId) { nodeId = getRandomNodeId(_restarter); } ndbout << _restart->m_name << " restarting non-master node = " << nodeId << " error code = " << error << endl; int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; CHECK(_restarter.dumpStateAllNodes(val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, error) == 0, "failed to set error insert"); CHECK(_restarter.waitNodesNoStart(&nodeId, 1, 300) == 0, "failed to wait no start"); CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); CHECK(_restarter.waitClusterStarted(300) == 0, "waitClusterStarted failed"); { int val = DumpStateOrd::DihMinTimeBetweenLCP; CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0, "failed to set error insert"); } } return NDBT_OK;}int stopOnError(NdbRestarter& _restarter, const NdbRestarts::NdbRestart* _restart){ myRandom48Init(NdbTick_CurrentMillisecond()); int randomId = myRandom48(_restarter.getNumDbNodes()); int nodeId = _restarter.getDbNodeId(randomId); do { g_info << _restart->m_name << ": node = " << nodeId << endl; CHECK(_restarter.waitClusterStarted(300) == 0, "waitClusterStarted failed"); int val = DumpStateOrd::NdbcntrTestStopOnError; CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0, "failed to set NdbcntrTestStopOnError"); NdbSleep_SecSleep(3); CHECK(_restarter.waitClusterStarted(300) == 0, "waitClusterStarted failed"); } while (false); return NDBT_OK;}int getRandomNodeId(NdbRestarter& _restarter) { myRandom48Init(NdbTick_CurrentMillisecond()); int randomId = myRandom48(_restarter.getNumDbNodes()); int nodeId = _restarter.getDbNodeId(randomId); return nodeId;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -