📄 ipfail.c

📁 linux集群服务器软件代码包
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
		}	}	if (hb->llc_ops->end_nodewalk(hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot end node walk");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(17);	}	return found;}gbooleangiveup(gpointer user_data){	/* Giveup: Takes the heartbeat cluster as input and the type of	 * resources to give up.  Returning FALSE causes the timer to die.	 * Forces the local node to release a particular class of resources.	 */	struct giveup_data *gd = user_data;	ll_cluster_t *hb = gd->hb;	const char *res_type = gd->res_type;	struct ha_msg *msg;	char pid[10];	cl_log(LOG_INFO, "giveup() called (timeout worked)");	if (is_stable(hb)) {		memset(pid, 0, sizeof(pid));		snprintf(pid, sizeof(pid), "%ld", (long)getpid());		msg = ha_msg_new(3);		ha_msg_add(msg, F_TYPE, T_ASKRESOURCES);		ha_msg_add(msg, F_RESOURCES, res_type);		ha_msg_add(msg, F_ORIG, node_name);		ha_msg_add(msg, F_COMMENT, "me");		hb->llc_ops->sendclustermsg(hb, msg);		cl_log(LOG_DEBUG, "Message [" T_ASKRESOURCES "] sent.");		ha_msg_del(msg);		need_standby = 0;	} else {		need_standby = 1;	}	return FALSE;}voiddelay_giveup(ll_cluster_t *hb, const char *res_type, int mseconds){	struct giveup_data *gd;	gd = malloc(sizeof(struct giveup_data));               if (gd == NULL) {                cl_log(LOG_ERR, "Out of memory, can't giveup.");                return;        }	gd->hb = hb;	gd->res_type = res_type;	/* Set mseconds to -1 to use default. (twice the keepalive) */	if (mseconds < 0) {		mseconds = hb->llc_ops->get_keepalive(hb) * 2;	}	cl_log(LOG_INFO, "Delayed giveup in %i seconds.", mseconds / 1000);	if (delay_giveup_tag) {		/* A timer exists already? */		cl_log(LOG_DEBUG, "Detected existing delay timer, overriding");		Gmain_timeout_remove(delay_giveup_tag);		delay_giveup_tag = 0;	}	/* We are going to call giveup in mseconds/1000 Seconds. */	delay_giveup_tag = Gmain_timeout_add_full(G_PRIORITY_DEFAULT, 						  mseconds, 						  giveup, (gpointer)gd, 						  giveup_destroy);}voidgiveup_destroy(gpointer user_data){	/* Clean up the struct giveup_data that we were using */	free(user_data);	delay_giveup_tag = 0;	cl_log(LOG_DEBUG, "giveup timeout has been destroyed.");}voidabort_giveup(){	if (delay_giveup_tag) {		cl_log(LOG_INFO, "Aborted delayed giveup (%u)", 		       delay_giveup_tag);		Gmain_timeout_remove(delay_giveup_tag);		delay_giveup_tag = 0;	} else {		cl_log(LOG_INFO, "No giveup timer to abort.");	}}voidsend_abort_giveup(ll_cluster_t *hb){        struct ha_msg *msg;        msg = ha_msg_new(2);        ha_msg_add(msg, F_TYPE, "abort_giveup");        ha_msg_add(msg, F_ORIG, node_name);        hb->llc_ops->sendnodemsg(hb, msg, other_node);        cl_log(LOG_DEBUG, "Abort message sent.");        ha_msg_del(msg);}voidmsg_abort_giveup(const struct ha_msg *msg, void *private){        abort_giveup();}voidmsg_ipfail_join(const struct ha_msg *msg, void *private){	/* msg_ipfail_join: When another ipfail client sends a join 	 * message, call ask_ping_nodes() to compare ping node counts.	 * Callback for the T_APICLISTAT message. 	 */	/* If this is a join message from ipfail on a different node.... */	if (!strcmp(ha_msg_value(msg, F_STATUS), JOINSTATUS) &&	    !strcmp(ha_msg_value(msg, F_FROMID), "ipfail")   && 	    strcmp(ha_msg_value(msg, F_ORIG),   node_name)) {		cl_log(LOG_DEBUG, 		       "Got join message from another ipfail client. (%s)",		       ha_msg_value(msg, F_ORIG));		ask_ping_nodes(private, ping_node_status(private));	}}voidmsg_resources(const struct ha_msg *msg, void *private){	const char * orig = ha_msg_value(msg, F_ORIG);	const char * isstable = ha_msg_value(msg, F_ISSTABLE);	/* msg_resources: Catch T_RESOURCES messages, so that we can	 * find out when stability is achieved among the cluster	 */	if (!orig || !isstable) {		return;	/* No stability info in this message... */	}	/* Right now there are two stable messages sent out, we are	 * only concerned with the one that has no info= line on it.	 */	if (!strcmp(orig, other_node) &&	    !ha_msg_value(msg, F_COMMENT) &&	    !strcmp(isstable, "1")) {		cl_log(LOG_DEBUG, "Other side is now stable.");		node_stable = 1;				/* There may be a pending standby */		if (need_standby) {			/* Gratuitious ARPs take some time, is there a			 * way to know when they're finished?  I don't			 * want this sleep here, even if it only is during			 * startup.			 */			/* This value is prone to be wrong for different			 * situations.  We need the resource stability			 * message to be delayed until the resource scripts			 * finish, and then we can stop waiting.			 */			sleep(10);			/* If the resource message stuff is solved, we could			 * safely giveup() here.  However, since we're waiting			 * for arbitrary amounts of time it may be wise to			 * recheck the assumptions of the cluster and count			 * ping nodes.			 */			ask_ping_nodes(private, ping_node_status(private));			/* giveup(private); */                        /* The ask_ping_nodes message will sort out whether                         * a standby is necessary. */                        need_standby = 0;		}	}	else if (!strcmp(orig, other_node) &&		 !strcmp(isstable, "0")) {		cl_log(LOG_DEBUG, "Other side is unstable.");		node_stable = 0;		}}voidask_ping_nodes(ll_cluster_t *hb, int num_ping){	/* ask_ping_nodes: Takes the heartbeat cluster and the number of	 * ping nodes we can see alive as input, returning nothing.	 * It asks the other node for the number of ping nodes it can see.	 */	struct ha_msg *msg;	char pid[10], np[5];	cl_log(LOG_INFO, "Asking other side for ping node count.");	memset(pid, 0, sizeof(pid));	snprintf(pid, sizeof(pid), "%ld", (long)getpid());	memset(np, 0, sizeof(np));	snprintf(np, sizeof(np), "%d", num_ping);	msg = ha_msg_new(3);	ha_msg_add(msg, F_TYPE, "num_ping_nodes");	ha_msg_add(msg, F_ORIG, node_name);	ha_msg_add(msg, F_NUMPING, np);	hb->llc_ops->sendnodemsg(hb, msg, other_node);	cl_log(LOG_DEBUG, "Message [" F_NUMPING "] sent.");	ha_msg_del(msg);}voidmsg_ping_nodes(const struct ha_msg *msg, void *private){	/* msg_ping_nodes: Takes the message and heartbeat cluster as input;	 * returns nothing.  Callback for the num_ping_nodes message.	 */	int num_nodes=0;	ll_cluster_t *hb = private;	cl_log(LOG_DEBUG, "Got asked for num_ping.");	num_nodes = ping_node_status(hb);	if (num_nodes > atoi(ha_msg_value(msg, F_NUMPING))) {                cl_log(LOG_INFO,                        "Telling other node that we have more visible ping "                       "nodes.");		you_are_dead(hb);	}	else if (num_nodes < atoi(ha_msg_value(msg, F_NUMPING))) {		cl_log(LOG_INFO,                        "Giving up because we have less visible ping nodes.");		delay_giveup(hb, HB_ALL_RESOURCES, -1);	}	else {		cl_log(LOG_INFO, "Ping node count is balanced.");		send_abort_giveup(hb);		if (delay_giveup_tag) {			/* We've got a delayed giveup, and we're now balanced*/			/* BUG? We don't want to do this if we have an 			   auto_failback pending, I think. */			abort_giveup();		} else if (auto_failback && is_stable(hb)) {			/* We're balanced, so make sure we don't have foreign 			 * stuff			 */			cl_log(LOG_INFO,                                "Giving up foreign resources (auto_failback).");			delay_giveup(hb, HB_FOREIGN_RESOURCES, -1);		}	}}voidyou_are_dead(ll_cluster_t *hb){	/* you_are_dead: Takes the heartbeat cluster as input; returns nothing.	 * Sends the you_are_dead message to the dead node.	 */	struct ha_msg *msg;	char pid[10];	cl_log(LOG_DEBUG, "Sending you_are_dead.");	memset(pid, 0, sizeof(pid));	snprintf(pid, sizeof(pid), "%ld", (long)getpid());	msg = ha_msg_new(1);	ha_msg_add(msg, F_TYPE, "you_are_dead");	hb->llc_ops->sendnodemsg(hb, msg, other_node);	cl_log(LOG_DEBUG, "Message [you_are_dead] sent.");	ha_msg_del(msg);}voidi_am_dead(const struct ha_msg *msg, void *private){	/* i_am_dead: Takes the you_are_dead message and the heartbeat cluster	 * as input; returns nothing.	 * Callback for the you_are_dead message.	 */	cl_log(LOG_INFO,                "Giving up because we were told that we have less ping nodes.");	delay_giveup(private, HB_ALL_RESOURCES, -1);}voidgotsig(int nsig){	(void)nsig;	quitnow = 1;}/* Used to handle the API in the gmainloop */gboolean ipfail_dispatch(int fd, gpointer user_data){	struct ha_msg *reply;	ll_cluster_t *hb = user_data;		/*	if (hb->llc_ops->msgready(hb))		cl_log(LOG_DEBUG, "Msg ready!");	cl_log(LOG_DEBUG, "Reading a message!");	*/	reply = hb->llc_ops->readmsg(hb, 0);	if (reply != NULL) {		/* cl_log_message(reply); */		ha_msg_del(reply); reply=NULL;		return TRUE;	}	/*	else	  return FALSE;	*/		return TRUE;}voidipfail_dispatch_destroy(gpointer user_data){	return;}gbooleanipfail_timeout_dispatch(gpointer user_data){	ll_cluster_t *hb = user_data;	if (quitnow) {		g_main_quit(mainloop);		return FALSE;	}	if (hb->llc_ops->msgready(hb)) {		/* cl_log(LOG_DEBUG, "Msg ready! [2]"); */		return ipfail_dispatch(-1, user_data);	}	return TRUE;}voidopen_api(ll_cluster_t *hb){	/* Sign in to the API and setup the log facility */	int facility;	cl_log(LOG_DEBUG, "Signing in with heartbeat");	if (hb->llc_ops->signon(hb, "ipfail")!= HA_OK) {		cl_log(LOG_ERR, "Cannot sign on with heartbeat");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(1);	}	if ((facility = hb->llc_ops->get_logfacility(hb)) <= 0) {		facility = DEFAULT_FACILITY;	}	cl_log_set_facility(facility);}voidclose_api(ll_cluster_t *hb){	/* Log off of the API and clean up */	if (hb->llc_ops->signoff(hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot sign off from heartbeat.");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(14);	}	if (hb->llc_ops->delete(hb) != HA_OK) {		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		cl_log(LOG_ERR, "Cannot delete API object.");		exit(15);	}}
上一页 12
💿 文件大小 2415 K
👤 上传用户 cz6891297
📂 所属分类网络
🏷️ 相关标签

#linux #服务器 #软件代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -