📄 ipfail.c

📁 linux集群服务器软件代码包
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* $Id: ipfail.c,v 1.39 2004/10/16 04:12:56 alan Exp $ *//* ipfail: IP Failover plugin for Linux-HA * * Copyright (C) 2002-2004 Kevin Dwyer <kevin@pheared.net> * * This plugin uses ping nodes to determine a failure in an * interface's connectivity and forces a hb_standby. It is based on the * api_test.c program included with Linux-HA. *  * Setup: In your ha.cf file make sure you have a ping node setup for each *        interface.  Choosing something like the switch that you are connected *        to is a good idea.  Choosing your win95 reboot-o-matic is a bad idea. *         *        The way this works is by taking note of when a ping node dies.   *        When a death is detected, it communicates with the other side to see *        if the other side saw it die (sort of).  If it didn't, then we know *        who deserves to have the resources. * * There are ways to improve this, and I'm working on them. * *//*  * api_test: Test program for testing the heartbeat API * * Copyright (C) 2000 Alan Robertson <alanr@unix.sh> *  * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. *  * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. *  * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#include <portability.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <fcntl.h>#include <unistd.h>#include <errno.h>#include <sys/types.h>#include <sys/utsname.h>#include <sys/time.h>#include <sys/stat.h>#include <stdarg.h>#include <libgen.h>#include <heartbeat.h>#include <ha_msg.h>#include <hb_api.h>#include <clplumbing/cl_log.h>#include <clplumbing/cl_signal.h>#include <clplumbing/cl_malloc.h>#include <clplumbing/GSource.h>#include <clplumbing/Gmain_timeout.h>#include <clplumbing/coredumps.h>#include "ipfail.h"/* ICK! global vars. */const char *node_name;	   /* The node we are connected to            */char other_node[SYS_NMLN]; /* The remote node in the pair             */int node_stable;           /* Other node stable?                      */int need_standby;          /* Are we waiting for stability?           */int quitnow = 0;           /* Allows a signal to break us out of loop */int auto_failback;         /* How is our auto_failback configured?    */GMainLoop *mainloop;       /* Reference to the mainloop for events    */guint delay_giveup_tag = 0;/* Our delay timer                         */intmain(int argc, char **argv){	unsigned fmask;	ll_cluster_t *hb;	char pid[10];	char *bname, *parameter;	int apifd;	cl_log_enable_stderr(TRUE);		/* Get the name of the binary for logging purposes */	bname = ha_strdup(argv[0]);	cl_log_set_entity(basename(bname));	cl_log_set_facility(DEFAULT_FACILITY);	hb = ll_cluster_new("heartbeat");	memset(other_node, 0, sizeof(other_node));	need_standby = 0;	memset(pid, 0, sizeof(pid));	snprintf(pid, sizeof(pid), "%ld", (long)getpid());	cl_log(LOG_DEBUG, "PID=%s", pid);	open_api(hb);	node_stable = is_stable(hb);	if (node_stable == -1) {		cl_log(LOG_ERR, "No managed resources");		exit(100);	}	/* Obtain our local node name */	node_name = hb->llc_ops->get_mynodeid(hb);	if (node_name == NULL) {		cl_log(LOG_ERR, "Cannot get my nodeid");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(19);	}	cl_log(LOG_DEBUG, "[We are %s]", node_name);	/* Check to see if we should engage auto_failback tactics */	parameter = hb->llc_ops->get_parameter(hb, "auto_failback");	if (parameter) {		/* This is equivalent to nice_failback off */		if (!strcmp(parameter, "legacy")) {			cl_log(LOG_ERR, "auto_failback set to "			       "incompatible legacy option.");			exit(100);		}		if (!strcmp(parameter, "on")) {			auto_failback = 1;		}else{			auto_failback = 0;		}		cl_log(LOG_DEBUG, "auto_failback -> %i (%s)", auto_failback,		       parameter);		cl_free(parameter);	}else{		cl_log(LOG_ERR, "Couldn't get auto_failback setting.");	}	/* See if we should drop cores somewhere odd... */	parameter = hb->llc_ops->get_parameter(hb, KEY_COREROOTDIR);	if (parameter) {		cl_set_corerootdir(parameter);		cl_cdtocoredir();	}	cl_cdtocoredir();	set_callbacks(hb);	fmask = LLC_FILTER_DEFAULT;	cl_log(LOG_DEBUG, "Setting message filter mode");	if (hb->llc_ops->setfmode(hb, fmask) != HA_OK) {		cl_log(LOG_ERR, "Cannot set filter mode");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(8);	}	node_walk(hb);	set_signals(hb);	cl_log(LOG_DEBUG, "Waiting for messages...");	errno = 0;	cl_log_enable_stderr(FALSE);	/* We will sit in a glib loop waiting for inputs, or making decisions	 * for failover	 */	mainloop = g_main_new(TRUE);		apifd = hb->llc_ops->inputfd(hb);		/* Watch the API's fd for input */	G_main_add_fd(G_PRIORITY_HIGH, apifd, FALSE, ipfail_dispatch, 		      (gpointer)hb, ipfail_dispatch_destroy);	Gmain_timeout_add_full(G_PRIORITY_DEFAULT, 1000, 	                     ipfail_timeout_dispatch, (gpointer)hb, 	                     ipfail_dispatch_destroy);		g_main_run(mainloop);	g_main_destroy(mainloop);		if (!quitnow && errno != EAGAIN && errno != EINTR) {		cl_perror("read_hb_msg returned NULL");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));	}	close_api(hb);	return 0;}intis_stable(ll_cluster_t *hb){	const char *resources = hb->llc_ops->get_resources(hb);	if (!resources)		/* Heartbeat is not providing resource management */	        return -1;		if (!strcmp(resources, "transition"))		return 0;	return 1;}voidnode_walk(ll_cluster_t *hb){	const char *node;/*	const char *intf;  --Out until ifwalk is fixed */	cl_log(LOG_DEBUG, "Starting node walk");	if (hb->llc_ops->init_nodewalk(hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot start node walk");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(9);	}	while((node = hb->llc_ops->nextnode(hb)) != NULL) {		cl_log(LOG_DEBUG, "Cluster node: %s: status: %s", node		,	hb->llc_ops->node_status(hb, node));		/* Look for our partner */		if (!strcmp("normal", hb->llc_ops->node_type(hb, node))		    && strcmp(node, node_name)) {			strcpy(other_node, node);			cl_log(LOG_DEBUG, "[They are %s]", other_node);		}		/* ifwalking is broken for ping nodes.  I don't think we even		   need it at this point.		if (hb->llc_ops->init_ifwalk(hb, node) != HA_OK) {			cl_log(LOG_ERR, "Cannot start if walk");			cl_log(LOG_ERR, "REASON: %s"			,	hb->llc_ops->errmsg(hb));			exit(10);		}		while ((intf = hb->llc_ops->nextif(hb))) {			cl_log(LOG_DEBUG, "\tnode %s: intf: %s ifstatus: %s"			,	node, intf			,	hb->llc_ops->if_status(hb, node, intf));		}		if (hb->llc_ops->end_ifwalk(hb) != HA_OK) {			cl_log(LOG_ERR, "Cannot end if walk");			cl_log(LOG_ERR, "REASON: %s"			,	hb->llc_ops->errmsg(hb));			exit(11);		}		-END of ifwalkcode */	}	if (hb->llc_ops->end_nodewalk(hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot end node walk");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(12);	}}voidset_callbacks(ll_cluster_t *hb){	/* Add each of the callbacks we use with the API */	if (hb->llc_ops->set_msg_callback(hb, T_APICLISTAT, 					  msg_ipfail_join, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set msg_ipfail_join callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(2);	}	if (hb->llc_ops->set_msg_callback(hb, T_RESOURCES, 					  msg_resources, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set msg_resources callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(18);	}	if (hb->llc_ops->set_msg_callback(hb, "num_ping_nodes", 					  msg_ping_nodes, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set msg callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(3);	}	if (hb->llc_ops->set_msg_callback(hb, "abort_giveup", 					  msg_abort_giveup, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set msg_abort_giveup callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(4);	}	if (hb->llc_ops->set_msg_callback(hb, "you_are_dead", 					  i_am_dead, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set i_am_dead callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(5);	}	if (hb->llc_ops->set_nstatus_callback(hb, NodeStatus, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set node status callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(6);	}	if (hb->llc_ops->set_ifstatus_callback(hb, LinkStatus, hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot set if status callback");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(7);	}}voidset_signals(ll_cluster_t *hb){	/* Setup the various signals */	CL_SIGINTERRUPT(SIGINT, 1);	CL_SIGNAL(SIGINT, gotsig);	CL_SIGINTERRUPT(SIGTERM, 1);	CL_SIGNAL(SIGTERM, gotsig);	cl_log(LOG_DEBUG, "Setting message signal");	if (hb->llc_ops->setmsgsignal(hb, 0) != HA_OK) {		cl_log(LOG_ERR, "Cannot set message signal");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(13);	}}voidNodeStatus(const char *node, const char *status, void *private){	/* Callback for node status changes */	cl_log(LOG_INFO, "Status update: Node %s now has status %s"	,	node, status);	if (strcmp(status, DEADSTATUS) == 0) {		if (ping_node_status(private)) {			cl_log(LOG_INFO, "NS: We are still alive!");		} else {			cl_log(LOG_INFO, "NS: We are dead. :<");		}	} else if (strcmp(status, PINGSTATUS) == 0) {		/* A ping node just came up, if we died, request resources?		 * If so, that would emulate the primary/secondary type of		 * High-Availability, instead of nice_failback mode		 */		/* Lets make sure we weren't both down, and now half up. */		int num_ping;		cl_log(LOG_INFO, "A ping node just came up.");		num_ping = ping_node_status(private);		ask_ping_nodes(private, num_ping);	}}voidLinkStatus(const char *node, const char *lnk, const char *status,	   void *private){	/* Callback for Link status changes */	int num_ping=0;	cl_log(LOG_INFO, "Link Status update: Link %s/%s now has status %s"	,	node, lnk, status);	if (strcmp(status, DEADSTATUS) == 0) {		/* If we can still see pinging node, request resources */		if ((num_ping = ping_node_status(private))) {			ask_ping_nodes(private, num_ping);			cl_log(LOG_INFO, "Checking remote count"			       " of ping nodes.");		} else {			cl_log(LOG_INFO, "We are dead. :<");                        ask_ping_nodes(private, num_ping);		}	}}intping_node_status(ll_cluster_t *hb){	/* ping_node_status: Takes the hearbeat cluster as input, 	 * returns number of ping nodes found to be in the cluster, 	 * and therefore alive.	 */	const char *node;	int found=0;       /* Number of ping nodes found */	if (hb->llc_ops->init_nodewalk(hb) != HA_OK) {		cl_log(LOG_ERR, "Cannot start node walk");		cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));		exit(16);	}	while((node = hb->llc_ops->nextnode(hb))!= NULL) {		if (!strcmp(PINGSTATUS, 			    hb->llc_ops->node_status(hb, node))) {			cl_log(LOG_DEBUG, "Found ping node %s!", node);			found++;
12 下一页
💿 文件大小 2415 K
👤 上传用户 cz6891297
📂 所属分类网络
🏷️ 相关标签

#linux #服务器 #软件代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -