📄 ctdb_recoverd.c

📁 samba最新软件
💻 C
📖 第 1 页 / 共 3 页
字号:
	int j, ret;	/* push the new vnn map out to all the nodes */	for (j=0; j<nodemap->num; j++) {		/* dont push to nodes that are unavailable */		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {			continue;		}		ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, vnnmap);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", vnn));			return -1;		}	}	return 0;}/*  handler for when the admin bans a node*/static void ban_handler(struct ctdb_context *ctdb, uint64_t srvid, 			TDB_DATA data, void *private_data){	struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);	struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;	uint32_t recmaster;	int ret;	if (data.dsize != sizeof(*b)) {		DEBUG(0,("Bad data in ban_handler\n"));		return;	}	ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);	if (ret != 0) {		DEBUG(0,(__location__ " Failed to find the recmaster\n"));		return;	}	if (recmaster != ctdb->vnn) {		DEBUG(0,("We are not the recmaster - ignoring ban request\n"));		return;	}	DEBUG(0,("Node %u has been banned for %u seconds by the administrator\n", 		 b->vnn, b->ban_time));	ctdb_ban_node(rec, b->vnn, b->ban_time);}/*  handler for when the admin unbans a node*/static void unban_handler(struct ctdb_context *ctdb, uint64_t srvid, 			  TDB_DATA data, void *private_data){	struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);	uint32_t vnn;	int ret;	uint32_t recmaster;	if (data.dsize != sizeof(uint32_t)) {		DEBUG(0,("Bad data in unban_handler\n"));		return;	}	vnn = *(uint32_t *)data.dptr;	ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);	if (ret != 0) {		DEBUG(0,(__location__ " Failed to find the recmaster\n"));		return;	}	if (recmaster != ctdb->vnn) {		DEBUG(0,("We are not the recmaster - ignoring unban request\n"));		return;	}	DEBUG(0,("Node %u has been unbanned by the administrator\n", vnn));	ctdb_unban_node(rec, vnn);}/*  called when ctdb_wait_timeout should finish */static void ctdb_wait_handler(struct event_context *ev, struct timed_event *te, 			      struct timeval yt, void *p){	uint32_t *timed_out = (uint32_t *)p;	(*timed_out) = 1;}/*  wait for a given number of seconds */static void ctdb_wait_timeout(struct ctdb_context *ctdb, uint32_t secs){	uint32_t timed_out = 0;	event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(secs, 0), ctdb_wait_handler, &timed_out);	while (!timed_out) {		event_loop_once(ctdb->ev);	}}/*  we are the recmaster, and recovery is needed - start a recovery run */static int do_recovery(struct ctdb_recoverd *rec, 		       TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,		       struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap,		       uint32_t culprit){	struct ctdb_context *ctdb = rec->ctdb;	int i, j, ret;	uint32_t generation;	struct ctdb_dbid_map *dbmap;	if (rec->last_culprit != culprit ||	    timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {		/* either a new node is the culprit, or we've decide to forgive them */		rec->last_culprit = culprit;		rec->first_recover_time = timeval_current();		rec->culprit_counter = 0;	}	rec->culprit_counter++;	if (rec->culprit_counter > 2*nodemap->num) {		DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",			 culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),			 ctdb->tunable.recovery_ban_period));		ctdb_ban_node(rec, culprit, ctdb->tunable.recovery_ban_period);	}	if (!ctdb_recovery_lock(ctdb, true)) {		DEBUG(0,("Unable to get recovery lock - aborting recovery\n"));		return -1;	}	/* set recovery mode to active on all nodes */	ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);	if (ret!=0) {		DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));		return -1;	}	DEBUG(0, (__location__ " Recovery initiated due to problem with node %u\n", culprit));	/* pick a new generation number */	generation = random();	/* change the vnnmap on this node to use the new generation 	   number but not on any other nodes.	   this guarantees that if we abort the recovery prematurely	   for some reason (a node stops responding?)	   that we can just return immediately and we will reenter	   recovery shortly again.	   I.e. we deliberately leave the cluster with an inconsistent	   generation id to allow us to abort recovery at any stage and	   just restart it from scratch.	 */	vnnmap->generation = generation;	ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, vnnmap);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", vnn));		return -1;	}	/* get a list of all databases */	ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &dbmap);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to get dbids from node :%u\n", vnn));		return -1;	}	/* verify that all other nodes have all our databases */	ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to create missing remote databases\n"));		return -1;	}	/* verify that we have all the databases any other node has */	ret = create_missing_local_databases(ctdb, nodemap, vnn, &dbmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to create missing local databases\n"));		return -1;	}	/* verify that all other nodes have all our databases */	ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to create missing remote databases\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - created remote databases\n"));	/* pull all remote databases onto the local node */	ret = pull_all_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to pull remote databases\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - pulled remote databases\n"));	/* push all local databases to the remote nodes */	ret = push_all_local_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to push local databases\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - pushed remote databases\n"));	/* build a new vnn map with all the currently active and	   unbanned nodes */	generation = random();	vnnmap = talloc(mem_ctx, struct ctdb_vnn_map);	CTDB_NO_MEMORY(ctdb, vnnmap);	vnnmap->generation = generation;	vnnmap->size = num_active;	vnnmap->map = talloc_zero_array(vnnmap, uint32_t, vnnmap->size);	for (i=j=0;i<nodemap->num;i++) {		if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {			vnnmap->map[j++] = nodemap->nodes[i].vnn;		}	}	/* update to the new vnnmap on all nodes */	ret = update_vnnmap_on_all_nodes(ctdb, nodemap, vnn, vnnmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to update vnnmap on all nodes\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - updated vnnmap\n"));	/* update recmaster to point to us for all nodes */	ret = set_recovery_master(ctdb, nodemap, vnn);	if (ret!=0) {		DEBUG(0, (__location__ " Unable to set recovery master\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - updated recmaster\n"));	/* repoint all local and remote database records to the local	   node as being dmaster	 */	ret = update_dmaster_on_all_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to update dmaster on all databases\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - updated dmaster on all databases\n"));	/*	  update all nodes to have the same flags that we have	 */	ret = update_flags_on_all_nodes(ctdb, nodemap);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to update flags on all nodes\n"));		return -1;	}		DEBUG(1, (__location__ " Recovery - updated flags\n"));	/*	  run a vacuum operation on empty records	 */	ret = vacuum_all_databases(ctdb, nodemap, dbmap);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to vacuum all databases\n"));		return -1;	}	DEBUG(1, (__location__ " Recovery - vacuumed all databases\n"));	/*	  if enabled, tell nodes to takeover their public IPs	 */	if (ctdb->takeover.enabled) {		ret = ctdb_takeover_run(ctdb, nodemap);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));			return -1;		}		DEBUG(1, (__location__ " Recovery - done takeover\n"));	}	/* disable recovery mode */	ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);	if (ret!=0) {		DEBUG(0, (__location__ " Unable to set recovery mode to normal on cluster\n"));		return -1;	}	/* send a message to all clients telling them that the cluster 	   has been reconfigured */	ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, CTDB_SRVID_RECONFIGURE, tdb_null);	DEBUG(0, (__location__ " Recovery complete\n"));	/* We just finished a recovery successfully. 	   We now wait for rerecovery_timeout before we allow 	   another recovery to take place.	*/	DEBUG(0, (__location__ " New recoveries supressed for the rerecovery timeout\n"));	ctdb_wait_timeout(ctdb, ctdb->tunable.rerecovery_timeout);	DEBUG(0, (__location__ " Rerecovery timeout elapsed. Recovery reactivated.\n"));	return 0;}/*  elections are won by first checking the number of connected nodes, then  the priority time, then the vnn */struct election_message {	uint32_t num_connected;	struct timeval priority_time;	uint32_t vnn;};/*  form this nodes election data */static void ctdb_election_data(struct ctdb_recoverd *rec, struct election_message *em){	int ret, i;	struct ctdb_node_map *nodemap;	struct ctdb_context *ctdb = rec->ctdb;	ZERO_STRUCTP(em);	em->vnn = rec->ctdb->vnn;	em->priority_time = rec->priority_time;	ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, rec, &nodemap);	if (ret != 0) {		return;	}	for (i=0;i<nodemap->num;i++) {		if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {			em->num_connected++;		}	}	talloc_free(nodemap);}/*  see if the given election data wins */static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message *em){	struct election_message myem;	int cmp;	ctdb_election_data(rec, &myem);	/* try to use the most connected node */	cmp = (int)myem.num_connected - (int)em->num_connected;	/* then the longest running node */	if (cmp == 0) {		cmp = timeval_compare(&em->priority_time, &myem.priority_time);	}	if (cmp == 0) {		cmp = (int)myem.vnn - (int)em->vnn;	}	return cmp > 0;}/*  send out an election request */static int send_election_request(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, uint32_t vnn){	int ret;	TDB_DATA election_data;	struct election_message emsg;	uint64_t srvid;	struct ctdb_context *ctdb = rec->ctdb;		srvid = CTDB_SRVID_RECOVERY;	ctdb_election_data(rec, &emsg);	election_data.dsize = sizeof(struct election_message);	election_data.dptr  = (unsigned char *)&emsg;	/* first we assume we will win the election and set 	   recoverymaster to be ourself on the current node	 */	ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, vnn);	if (ret != 0) {		DEBUG(0, (__location__ " failed to send recmaster election request\n"));		return -1;	}	/* send an election message to all active nodes */	ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data);	return 0;}/*  this function will unban all nodes in the cluster*/static void unban_all_nodes(struct ctdb_context *ctdb){	int ret, i;	struct ctdb_node_map *nodemap;	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);		ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);	if (ret != 0) {		DEBUG(0,(__location__ " failed to get nodemap to unban all nodes\n"));		return;	}	for (i=0;i<nodemap->num;i++) {		if ( (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED))		  && (nodemap->nodes[i].flags & NODE_FLAGS_BANNED) ) {			ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, 0, NODE_FLAGS_BANNED);		}	}	talloc_free(tmp_ctx);}/*  handler for recovery master elections*/static void election_handler(struct ctdb_context *ctdb, uint64_t srvid, 			     TDB_DATA data, void *private_data){	struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);	int ret;	struct election_message *em = (struct election_message *)data.dptr;	TALLOC_CTX *mem_ctx;	mem_ctx = talloc_new(ctdb);	/* someone called an election. check their election data	   and if we disagree and we would rather be the elected node, 	   send a new election message to all other nodes	 */	if (ctdb_election_win(rec, em)) {		ret = send_election_request(rec, mem_ctx, ctdb_get_vnn(ctdb));		if (ret!=0) {			DEBUG(0, (__location__ " failed to initiate recmaster election"));		}		talloc_free(mem_ctx);		/*unban_all_nodes(ctdb);*/		return;	}	/* release the recmaster lock */	if (em->vnn != ctdb->vnn &&	    ctdb->recovery_lock_fd != -1) {		close(ctdb->recovery_lock_fd);		ctdb->recovery_lock_fd = -1;		unban_all_nodes(ctdb);	}	/* ok, let that guy become recmaster then */	ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), ctdb_get_vnn(ctdb), em->vnn);	if (ret != 0) {		DEBUG(0, (__location__ " failed to send recmaster election request"));		talloc_free(mem_ctx);		return;	}	/* release any bans */	rec->last_culprit = (uint32_t)-1;	talloc_free(rec->banned_nodes);	rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);	CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);	talloc_free(mem_ctx);	return;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -