📄 ctdb_recoverd.c

📁 samba最新软件
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
/*  force the start of the election process */static void force_election(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, uint32_t vnn, 			   struct ctdb_node_map *nodemap){	int ret;	struct ctdb_context *ctdb = rec->ctdb;	/* set all nodes to recovery mode to stop all internode traffic */	ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);	if (ret!=0) {		DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));		return;	}		ret = send_election_request(rec, mem_ctx, vnn);	if (ret!=0) {		DEBUG(0, (__location__ " failed to initiate recmaster election"));		return;	}	/* wait for a few seconds to collect all responses */	ctdb_wait_timeout(ctdb, ctdb->tunable.election_timeout);}/*  handler for when a node changes its flags*/static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, 			    TDB_DATA data, void *private_data){	int ret;	struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;	struct ctdb_node_map *nodemap=NULL;	TALLOC_CTX *tmp_ctx;	int i;	if (data.dsize != sizeof(*c)) {		DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));		return;	}	tmp_ctx = talloc_new(ctdb);	CTDB_NO_MEMORY_VOID(ctdb, tmp_ctx);	ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);	for (i=0;i<nodemap->num;i++) {		if (nodemap->nodes[i].vnn == c->vnn) break;	}	if (i == nodemap->num) {		DEBUG(0,(__location__ "Flag change for non-existant node %u\n", c->vnn));		talloc_free(tmp_ctx);		return;	}	/* Dont let messages from remote nodes change the DISCONNECTED flag. 	   This flag is handled locally based on whether the local node	   can communicate with the node or not.	*/	c->flags &= ~NODE_FLAGS_DISCONNECTED;	if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {		c->flags |= NODE_FLAGS_DISCONNECTED;	}	if (nodemap->nodes[i].flags != c->flags) {		DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));	}	nodemap->nodes[i].flags = c->flags;	ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), 				     CTDB_CURRENT_NODE, &ctdb->recovery_master);	if (ret == 0) {		ret = ctdb_ctrl_getrecmode(ctdb, CONTROL_TIMEOUT(), 					   CTDB_CURRENT_NODE, &ctdb->recovery_mode);	}		if (ret == 0 &&	    ctdb->recovery_master == ctdb->vnn &&	    ctdb->recovery_mode == CTDB_RECOVERY_NORMAL &&	    ctdb->takeover.enabled) {		ret = ctdb_takeover_run(ctdb, nodemap);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));		}	}	talloc_free(tmp_ctx);}/*  the main monitoring loop */static void monitor_cluster(struct ctdb_context *ctdb){	uint32_t vnn, num_active, recmode, recmaster;	TALLOC_CTX *mem_ctx=NULL;	struct ctdb_node_map *nodemap=NULL;	struct ctdb_node_map *remote_nodemap=NULL;	struct ctdb_vnn_map *vnnmap=NULL;	struct ctdb_vnn_map *remote_vnnmap=NULL;	int i, j, ret;	bool need_takeover_run;	struct ctdb_recoverd *rec;	rec = talloc_zero(ctdb, struct ctdb_recoverd);	CTDB_NO_MEMORY_FATAL(ctdb, rec);	rec->ctdb = ctdb;	rec->banned_nodes = talloc_zero_array(rec, struct ban_state *, ctdb->num_nodes);	CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);	rec->priority_time = timeval_current();	/* register a message port for recovery elections */	ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);	/* and one for when nodes are disabled/enabled */	ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);	/* and one for when nodes are banned */	ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);	/* and one for when nodes are unbanned */	ctdb_set_message_handler(ctdb, CTDB_SRVID_UNBAN_NODE, unban_handler, rec);	again:	need_takeover_run = false;	if (mem_ctx) {		talloc_free(mem_ctx);		mem_ctx = NULL;	}	mem_ctx = talloc_new(ctdb);	if (!mem_ctx) {		DEBUG(0,("Failed to create temporary context\n"));		exit(-1);	}	/* we only check for recovery once every second */	ctdb_wait_timeout(ctdb, ctdb->tunable.recover_interval);	/* get relevant tunables */	ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable);	if (ret != 0) {		DEBUG(0,("Failed to get tunables - retrying\n"));		goto again;	}	vnn = ctdb_ctrl_getvnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);	if (vnn == (uint32_t)-1) {		DEBUG(0,("Failed to get local vnn - retrying\n"));		goto again;	}	/* get the vnnmap */	ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &vnnmap);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to get vnnmap from node %u\n", vnn));		goto again;	}	/* get number of nodes */	ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &nodemap);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to get nodemap from node %u\n", vnn));		goto again;	}	/* count how many active nodes there are */	num_active = 0;	for (i=0; i<nodemap->num; i++) {		if (rec->banned_nodes[nodemap->nodes[i].vnn] != NULL) {			nodemap->nodes[i].flags |= NODE_FLAGS_BANNED;		} else {			nodemap->nodes[i].flags &= ~NODE_FLAGS_BANNED;		}		if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {			num_active++;		}	}	/* check which node is the recovery master */	ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, &recmaster);	if (ret != 0) {		DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));		goto again;	}	if (recmaster == (uint32_t)-1) {		DEBUG(0,(__location__ " Initial recovery master set - forcing election\n"));		force_election(rec, mem_ctx, vnn, nodemap);		goto again;	}		/* verify that the recmaster node is still active */	for (j=0; j<nodemap->num; j++) {		if (nodemap->nodes[j].vnn==recmaster) {			break;		}	}	if (j == nodemap->num) {		DEBUG(0, ("Recmaster node %u not in list. Force reelection\n", recmaster));		force_election(rec, mem_ctx, vnn, nodemap);		goto again;	}	if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {		DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].vnn));		force_election(rec, mem_ctx, vnn, nodemap);		goto again;	}		/* if we are not the recmaster then we do not need to check	   if recovery is needed	 */	if (vnn!=recmaster) {		goto again;	}	/* verify that all active nodes agree that we are the recmaster */	for (j=0; j<nodemap->num; j++) {		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {			continue;		}		if (nodemap->nodes[j].vnn == vnn) {			continue;		}		ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmaster);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));			goto again;		}		if (recmaster!=vnn) {			DEBUG(0, ("Node %u does not agree we are the recmaster. Force reelection\n", 				  nodemap->nodes[j].vnn));			force_election(rec, mem_ctx, vnn, nodemap);			goto again;		}	}	/* verify that all active nodes are in normal mode 	   and not in recovery mode 	 */	for (j=0; j<nodemap->num; j++) {		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {			continue;		}		ret = ctdb_ctrl_getrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmode);		if (ret != 0) {			DEBUG(0, ("Unable to get recmode from node %u\n", vnn));			goto again;		}		if (recmode != CTDB_RECOVERY_NORMAL) {			DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", 				  nodemap->nodes[j].vnn));			do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);			goto again;		}	}	/* get the nodemap for all active remote nodes and verify	   they are the same as for this node	 */	for (j=0; j<nodemap->num; j++) {		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {			continue;		}		if (nodemap->nodes[j].vnn == vnn) {			continue;		}		ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, 					   mem_ctx, &remote_nodemap);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to get nodemap from remote node %u\n", 				  nodemap->nodes[j].vnn));			goto again;		}		/* if the nodes disagree on how many nodes there are		   then this is a good reason to try recovery		 */		if (remote_nodemap->num != nodemap->num) {			DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",				  nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num));			do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);			goto again;		}		/* if the nodes disagree on which nodes exist and are		   active, then that is also a good reason to do recovery		 */		for (i=0;i<nodemap->num;i++) {			if (remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn) {				DEBUG(0, (__location__ " Remote node:%u has different nodemap vnn for %d (%u vs %u).\n", 					  nodemap->nodes[j].vnn, i, 					  remote_nodemap->nodes[i].vnn, nodemap->nodes[i].vnn));				do_recovery(rec, mem_ctx, vnn, num_active, nodemap, 					    vnnmap, nodemap->nodes[j].vnn);				goto again;			}			if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) != 			    (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {				DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n", 					  nodemap->nodes[j].vnn, i,					  remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));				do_recovery(rec, mem_ctx, vnn, num_active, nodemap, 					    vnnmap, nodemap->nodes[j].vnn);				goto again;			}		}		/* update our nodemap flags according to the other		   server - this gets the NODE_FLAGS_DISABLED		   flag. Note that the remote node is authoritative		   for its flags (except CONNECTED, which we know		   matches in this code) */		if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) {			nodemap->nodes[j].flags = remote_nodemap->nodes[j].flags;			need_takeover_run = true;		}	}	/* there better be the same number of lmasters in the vnn map	   as there are active nodes or we will have to do a recovery	 */	if (vnnmap->size != num_active) {		DEBUG(0, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n", 			  vnnmap->size, num_active));		do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, ctdb->vnn);		goto again;	}	/* verify that all active nodes in the nodemap also exist in 	   the vnnmap.	 */	for (j=0; j<nodemap->num; j++) {		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {			continue;		}		if (nodemap->nodes[j].vnn == vnn) {			continue;		}		for (i=0; i<vnnmap->size; i++) {			if (vnnmap->map[i] == nodemap->nodes[j].vnn) {				break;			}		}		if (i == vnnmap->size) {			DEBUG(0, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n", 				  nodemap->nodes[j].vnn));			do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);			goto again;		}	}		/* verify that all other nodes have the same vnnmap	   and are from the same generation	 */	for (j=0; j<nodemap->num; j++) {		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {			continue;		}		if (nodemap->nodes[j].vnn == vnn) {			continue;		}		ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, 					  mem_ctx, &remote_vnnmap);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to get vnnmap from remote node %u\n", 				  nodemap->nodes[j].vnn));			goto again;		}		/* verify the vnnmap generation is the same */		if (vnnmap->generation != remote_vnnmap->generation) {			DEBUG(0, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n", 				  nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation));			do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);			goto again;		}		/* verify the vnnmap size is the same */		if (vnnmap->size != remote_vnnmap->size) {			DEBUG(0, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n", 				  nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size));			do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);			goto again;		}		/* verify the vnnmap is the same */		for (i=0;i<vnnmap->size;i++) {			if (remote_vnnmap->map[i] != vnnmap->map[i]) {				DEBUG(0, (__location__ " Remote node %u has different vnnmap.\n", 					  nodemap->nodes[j].vnn));				do_recovery(rec, mem_ctx, vnn, num_active, nodemap, 					    vnnmap, nodemap->nodes[j].vnn);				goto again;			}		}	}	/* we might need to change who has what IP assigned */	if (need_takeover_run && ctdb->takeover.enabled) {		ret = ctdb_takeover_run(ctdb, nodemap);		if (ret != 0) {			DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));		}	}	goto again;}/*  event handler for when the main ctdbd dies */static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde, 				 uint16_t flags, void *private_data){	DEBUG(0,("recovery daemon parent died - exiting\n"));	_exit(1);}/*  startup the recovery daemon as a child of the main ctdb daemon */int ctdb_start_recoverd(struct ctdb_context *ctdb){	int ret;	int fd[2];	pid_t child;	if (pipe(fd) != 0) {		return -1;	}	child = fork();	if (child == -1) {		return -1;	}		if (child != 0) {		close(fd[0]);		return 0;	}	close(fd[1]);	/* shutdown the transport */	ctdb->methods->shutdown(ctdb);	/* get a new event context */	talloc_free(ctdb->ev);	ctdb->ev = event_context_init(ctdb);	event_add_fd(ctdb->ev, ctdb, fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE, 		     ctdb_recoverd_parent, &fd[0]);		close(ctdb->daemon.sd);	ctdb->daemon.sd = -1;	srandom(getpid() ^ time(NULL));	/* initialise ctdb */	ret = ctdb_socket_connect(ctdb);	if (ret != 0) {		DEBUG(0, (__location__ " Failed to init ctdb\n"));		exit(1);	}	monitor_cluster(ctdb);	DEBUG(0,("ERROR: ctdb_recoverd finished!?\n"));	return -1;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -