📄 ctdb_recoverd.c
字号:
/* ctdb recovery daemon Copyright (C) Ronnie Sahlberg 2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>.*/#include "includes.h"#include "lib/events/events.h"#include "system/filesys.h"#include "system/time.h"#include "popt.h"#include "cmdline.h"#include "../include/ctdb.h"#include "../include/ctdb_private.h"struct ban_state { struct ctdb_recoverd *rec; uint32_t banned_node;};/* private state of recovery daemon */struct ctdb_recoverd { struct ctdb_context *ctdb; uint32_t last_culprit; uint32_t culprit_counter; struct timeval first_recover_time; struct ban_state **banned_nodes; struct timeval priority_time;};#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)/* unban a node */static void ctdb_unban_node(struct ctdb_recoverd *rec, uint32_t vnn){ struct ctdb_context *ctdb = rec->ctdb; if (!ctdb_validate_vnn(ctdb, vnn)) { DEBUG(0,("Bad vnn %u in ctdb_ban_node\n", vnn)); return; } if (rec->banned_nodes[vnn] == NULL) { return; } ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, 0, NODE_FLAGS_BANNED); talloc_free(rec->banned_nodes[vnn]); rec->banned_nodes[vnn] = NULL;}/* called when a ban has timed out */static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p){ struct ban_state *state = talloc_get_type(p, struct ban_state); struct ctdb_recoverd *rec = state->rec; uint32_t vnn = state->banned_node; DEBUG(0,("Node %u is now unbanned\n", vnn)); ctdb_unban_node(rec, vnn);}/* ban a node for a period of time */static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t vnn, uint32_t ban_time){ struct ctdb_context *ctdb = rec->ctdb; if (!ctdb_validate_vnn(ctdb, vnn)) { DEBUG(0,("Bad vnn %u in ctdb_ban_node\n", vnn)); return; } if (vnn == ctdb->vnn) { DEBUG(0,("self ban - lowering our election priority\n")); /* banning ourselves - lower our election priority */ rec->priority_time = timeval_current(); } ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), vnn, NODE_FLAGS_BANNED, 0); rec->banned_nodes[vnn] = talloc(rec, struct ban_state); CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes[vnn]); rec->banned_nodes[vnn]->rec = rec; rec->banned_nodes[vnn]->banned_node = vnn; if (ban_time != 0) { event_add_timed(ctdb->ev, rec->banned_nodes[vnn], timeval_current_ofs(ban_time, 0), ctdb_ban_timeout, rec->banned_nodes[vnn]); }}/* change recovery mode on all nodes */static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode){ int j, ret; /* start the freeze process immediately on all nodes */ ctdb_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, CTDB_CONTROL_FREEZE, CTDB_CTRL_FLAG_NOREPLY, tdb_null, NULL, NULL, NULL, NULL, NULL); /* set recovery mode to active on all nodes */ for (j=0; j<nodemap->num; j++) { /* dont change it for nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } if (rec_mode == CTDB_RECOVERY_ACTIVE) { ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn); if (ret != 0) { DEBUG(0, (__location__ " Unable to freeze node %u\n", nodemap->nodes[j].vnn)); return -1; } } ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, rec_mode); if (ret != 0) { DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].vnn)); return -1; } if (rec_mode == CTDB_RECOVERY_NORMAL) { ret = ctdb_ctrl_thaw(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn); if (ret != 0) { DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].vnn)); return -1; } } } return 0;}/* change recovery master on all node */static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn){ int j, ret; /* set recovery master to vnn on all nodes */ for (j=0; j<nodemap->num; j++) { /* dont change it for nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, vnn); if (ret != 0) { DEBUG(0, (__location__ " Unable to set recmaster on node %u\n", nodemap->nodes[j].vnn)); return -1; } } return 0;}/* ensure all other nodes have attached to any databases that we have */static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx){ int i, j, db, ret; struct ctdb_dbid_map *remote_dbmap; /* verify that all other nodes have all our databases */ for (j=0; j<nodemap->num; j++) { /* we dont need to ourself ourselves */ if (nodemap->nodes[j].vnn == vnn) { continue; } /* dont check nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbids from node %u\n", vnn)); return -1; } /* step through all local databases */ for (db=0; db<dbmap->num;db++) { const char *name; for (i=0;i<remote_dbmap->num;i++) { if (dbmap->dbids[db] == remote_dbmap->dbids[i]) { break; } } /* the remote node already have this database */ if (i!=remote_dbmap->num) { continue; } /* ok so we need to create this database */ ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), vnn, dbmap->dbids[db], mem_ctx, &name); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbname from node %u\n", vnn)); return -1; } ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, name); if (ret != 0) { DEBUG(0, (__location__ " Unable to create remote db:%s\n", name)); return -1; } } } return 0;}/* ensure we are attached to any databases that anyone else is attached to */static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map **dbmap, TALLOC_CTX *mem_ctx){ int i, j, db, ret; struct ctdb_dbid_map *remote_dbmap; /* verify that we have all database any other node has */ for (j=0; j<nodemap->num; j++) { /* we dont need to ourself ourselves */ if (nodemap->nodes[j].vnn == vnn) { continue; } /* dont check nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbids from node %u\n", vnn)); return -1; } /* step through all databases on the remote node */ for (db=0; db<remote_dbmap->num;db++) { const char *name; for (i=0;i<(*dbmap)->num;i++) { if (remote_dbmap->dbids[db] == (*dbmap)->dbids[i]) { break; } } /* we already have this db locally */ if (i!=(*dbmap)->num) { continue; } /* ok so we need to create this database and rebuild dbmap */ ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbname from node %u\n", nodemap->nodes[j].vnn)); return -1; } ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, name); if (ret != 0) { DEBUG(0, (__location__ " Unable to create local db:%s\n", name)); return -1; } ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, dbmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to reread dbmap on node %u\n", vnn)); return -1; } } } return 0;}/* pull all the remote database contents into ours */static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx){ int i, j, ret; /* pull all records from all other nodes across onto this node (this merges based on rsn) */ for (i=0;i<dbmap->num;i++) { for (j=0; j<nodemap->num; j++) { /* we dont need to merge with ourselves */ if (nodemap->nodes[j].vnn == vnn) { continue; } /* dont merge from nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx); if (ret != 0) { DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn)); return -1; } } } return 0;}/* change the dmaster on all databases to point to us */static int update_dmaster_on_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx){ int i, j, ret; /* update dmaster to point to this node for all databases/nodes */ for (i=0;i<dbmap->num;i++) { for (j=0; j<nodemap->num; j++) { /* dont repoint nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn); if (ret != 0) { DEBUG(0, (__location__ " Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i])); return -1; } } } return 0;}/* update flags on all active nodes */static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap){ int i; for (i=0;i<nodemap->num;i++) { struct ctdb_node_flag_change c; TDB_DATA data; c.vnn = nodemap->nodes[i].vnn; c.flags = nodemap->nodes[i].flags; data.dptr = (uint8_t *)&c; data.dsize = sizeof(c); ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_NODE_FLAGS_CHANGED, data); } return 0;}/* vacuum one database */static int vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node_map *nodemap){ uint64_t max_rsn; int ret, i; /* find max rsn on our local node for this db */ ret = ctdb_ctrl_get_max_rsn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, db_id, &max_rsn); if (ret != 0) { return -1; } /* set rsn on non-empty records to max_rsn+1 */ for (i=0;i<nodemap->num;i++) { if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_set_rsn_nonempty(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, db_id, max_rsn+1); if (ret != 0) { DEBUG(0,(__location__ " Failed to set rsn on node %u to %llu\n", nodemap->nodes[i].vnn, (unsigned long long)max_rsn+1)); return -1; } } /* delete records with rsn < max_rsn+1 on all nodes */ for (i=0;i<nodemap->num;i++) { if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_delete_low_rsn(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, db_id, max_rsn+1); if (ret != 0) { DEBUG(0,(__location__ " Failed to delete records on node %u with rsn below %llu\n", nodemap->nodes[i].vnn, (unsigned long long)max_rsn+1)); return -1; } } return 0;}/* vacuum all attached databases */static int vacuum_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, struct ctdb_dbid_map *dbmap){ int i; /* update dmaster to point to this node for all databases/nodes */ for (i=0;i<dbmap->num;i++) { if (vacuum_db(ctdb, dbmap->dbids[i], nodemap) != 0) { return -1; } } return 0;}/* push out all our database contents to all other nodes */static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx){ int i, j, ret; /* push all records out to the nodes again */ for (i=0;i<dbmap->num;i++) { for (j=0; j<nodemap->num; j++) { /* we dont need to push to ourselves */ if (nodemap->nodes[j].vnn == vnn) { continue; } /* dont push to nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx); if (ret != 0) { DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn)); return -1; } } } return 0;}/* ensure all nodes have the same vnnmap we do */static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_vnn_map *vnnmap, TALLOC_CTX *mem_ctx){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -