📄 ipfail.c
字号:
/* $Id: ipfail.c,v 1.39 2004/10/16 04:12:56 alan Exp $ *//* ipfail: IP Failover plugin for Linux-HA * * Copyright (C) 2002-2004 Kevin Dwyer <kevin@pheared.net> * * This plugin uses ping nodes to determine a failure in an * interface's connectivity and forces a hb_standby. It is based on the * api_test.c program included with Linux-HA. * * Setup: In your ha.cf file make sure you have a ping node setup for each * interface. Choosing something like the switch that you are connected * to is a good idea. Choosing your win95 reboot-o-matic is a bad idea. * * The way this works is by taking note of when a ping node dies. * When a death is detected, it communicates with the other side to see * if the other side saw it die (sort of). If it didn't, then we know * who deserves to have the resources. * * There are ways to improve this, and I'm working on them. * *//* * api_test: Test program for testing the heartbeat API * * Copyright (C) 2000 Alan Robertson <alanr@unix.sh> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include <portability.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <fcntl.h>#include <unistd.h>#include <errno.h>#include <sys/types.h>#include <sys/utsname.h>#include <sys/time.h>#include <sys/stat.h>#include <stdarg.h>#include <libgen.h>#include <heartbeat.h>#include <ha_msg.h>#include <hb_api.h>#include <clplumbing/cl_log.h>#include <clplumbing/cl_signal.h>#include <clplumbing/cl_malloc.h>#include <clplumbing/GSource.h>#include <clplumbing/Gmain_timeout.h>#include <clplumbing/coredumps.h>#include "ipfail.h"/* ICK! global vars. */const char *node_name; /* The node we are connected to */char other_node[SYS_NMLN]; /* The remote node in the pair */int node_stable; /* Other node stable? */int need_standby; /* Are we waiting for stability? */int quitnow = 0; /* Allows a signal to break us out of loop */int auto_failback; /* How is our auto_failback configured? */GMainLoop *mainloop; /* Reference to the mainloop for events */guint delay_giveup_tag = 0;/* Our delay timer */intmain(int argc, char **argv){ unsigned fmask; ll_cluster_t *hb; char pid[10]; char *bname, *parameter; int apifd; cl_log_enable_stderr(TRUE); /* Get the name of the binary for logging purposes */ bname = ha_strdup(argv[0]); cl_log_set_entity(basename(bname)); cl_log_set_facility(DEFAULT_FACILITY); hb = ll_cluster_new("heartbeat"); memset(other_node, 0, sizeof(other_node)); need_standby = 0; memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); cl_log(LOG_DEBUG, "PID=%s", pid); open_api(hb); node_stable = is_stable(hb); if (node_stable == -1) { cl_log(LOG_ERR, "No managed resources"); exit(100); } /* Obtain our local node name */ node_name = hb->llc_ops->get_mynodeid(hb); if (node_name == NULL) { cl_log(LOG_ERR, "Cannot get my nodeid"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(19); } cl_log(LOG_DEBUG, "[We are %s]", node_name); /* Check to see if we should engage auto_failback tactics */ parameter = hb->llc_ops->get_parameter(hb, "auto_failback"); if (parameter) { /* This is equivalent to nice_failback off */ if (!strcmp(parameter, "legacy")) { cl_log(LOG_ERR, "auto_failback set to " "incompatible legacy option."); exit(100); } if (!strcmp(parameter, "on")) { auto_failback = 1; }else{ auto_failback = 0; } cl_log(LOG_DEBUG, "auto_failback -> %i (%s)", auto_failback, parameter); cl_free(parameter); }else{ cl_log(LOG_ERR, "Couldn't get auto_failback setting."); } /* See if we should drop cores somewhere odd... */ parameter = hb->llc_ops->get_parameter(hb, KEY_COREROOTDIR); if (parameter) { cl_set_corerootdir(parameter); cl_cdtocoredir(); } cl_cdtocoredir(); set_callbacks(hb); fmask = LLC_FILTER_DEFAULT; cl_log(LOG_DEBUG, "Setting message filter mode"); if (hb->llc_ops->setfmode(hb, fmask) != HA_OK) { cl_log(LOG_ERR, "Cannot set filter mode"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(8); } node_walk(hb); set_signals(hb); cl_log(LOG_DEBUG, "Waiting for messages..."); errno = 0; cl_log_enable_stderr(FALSE); /* We will sit in a glib loop waiting for inputs, or making decisions * for failover */ mainloop = g_main_new(TRUE); apifd = hb->llc_ops->inputfd(hb); /* Watch the API's fd for input */ G_main_add_fd(G_PRIORITY_HIGH, apifd, FALSE, ipfail_dispatch, (gpointer)hb, ipfail_dispatch_destroy); Gmain_timeout_add_full(G_PRIORITY_DEFAULT, 1000, ipfail_timeout_dispatch, (gpointer)hb, ipfail_dispatch_destroy); g_main_run(mainloop); g_main_destroy(mainloop); if (!quitnow && errno != EAGAIN && errno != EINTR) { cl_perror("read_hb_msg returned NULL"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); } close_api(hb); return 0;}intis_stable(ll_cluster_t *hb){ const char *resources = hb->llc_ops->get_resources(hb); if (!resources) /* Heartbeat is not providing resource management */ return -1; if (!strcmp(resources, "transition")) return 0; return 1;}voidnode_walk(ll_cluster_t *hb){ const char *node;/* const char *intf; --Out until ifwalk is fixed */ cl_log(LOG_DEBUG, "Starting node walk"); if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot start node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(9); } while((node = hb->llc_ops->nextnode(hb)) != NULL) { cl_log(LOG_DEBUG, "Cluster node: %s: status: %s", node , hb->llc_ops->node_status(hb, node)); /* Look for our partner */ if (!strcmp("normal", hb->llc_ops->node_type(hb, node)) && strcmp(node, node_name)) { strcpy(other_node, node); cl_log(LOG_DEBUG, "[They are %s]", other_node); } /* ifwalking is broken for ping nodes. I don't think we even need it at this point. if (hb->llc_ops->init_ifwalk(hb, node) != HA_OK) { cl_log(LOG_ERR, "Cannot start if walk"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); exit(10); } while ((intf = hb->llc_ops->nextif(hb))) { cl_log(LOG_DEBUG, "\tnode %s: intf: %s ifstatus: %s" , node, intf , hb->llc_ops->if_status(hb, node, intf)); } if (hb->llc_ops->end_ifwalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end if walk"); cl_log(LOG_ERR, "REASON: %s" , hb->llc_ops->errmsg(hb)); exit(11); } -END of ifwalkcode */ } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(12); }}voidset_callbacks(ll_cluster_t *hb){ /* Add each of the callbacks we use with the API */ if (hb->llc_ops->set_msg_callback(hb, T_APICLISTAT, msg_ipfail_join, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_ipfail_join callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(2); } if (hb->llc_ops->set_msg_callback(hb, T_RESOURCES, msg_resources, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_resources callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(18); } if (hb->llc_ops->set_msg_callback(hb, "num_ping_nodes", msg_ping_nodes, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(3); } if (hb->llc_ops->set_msg_callback(hb, "abort_giveup", msg_abort_giveup, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set msg_abort_giveup callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(4); } if (hb->llc_ops->set_msg_callback(hb, "you_are_dead", i_am_dead, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set i_am_dead callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(5); } if (hb->llc_ops->set_nstatus_callback(hb, NodeStatus, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set node status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(6); } if (hb->llc_ops->set_ifstatus_callback(hb, LinkStatus, hb) != HA_OK) { cl_log(LOG_ERR, "Cannot set if status callback"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(7); }}voidset_signals(ll_cluster_t *hb){ /* Setup the various signals */ CL_SIGINTERRUPT(SIGINT, 1); CL_SIGNAL(SIGINT, gotsig); CL_SIGINTERRUPT(SIGTERM, 1); CL_SIGNAL(SIGTERM, gotsig); cl_log(LOG_DEBUG, "Setting message signal"); if (hb->llc_ops->setmsgsignal(hb, 0) != HA_OK) { cl_log(LOG_ERR, "Cannot set message signal"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(13); }}voidNodeStatus(const char *node, const char *status, void *private){ /* Callback for node status changes */ cl_log(LOG_INFO, "Status update: Node %s now has status %s" , node, status); if (strcmp(status, DEADSTATUS) == 0) { if (ping_node_status(private)) { cl_log(LOG_INFO, "NS: We are still alive!"); } else { cl_log(LOG_INFO, "NS: We are dead. :<"); } } else if (strcmp(status, PINGSTATUS) == 0) { /* A ping node just came up, if we died, request resources? * If so, that would emulate the primary/secondary type of * High-Availability, instead of nice_failback mode */ /* Lets make sure we weren't both down, and now half up. */ int num_ping; cl_log(LOG_INFO, "A ping node just came up."); num_ping = ping_node_status(private); ask_ping_nodes(private, num_ping); }}voidLinkStatus(const char *node, const char *lnk, const char *status, void *private){ /* Callback for Link status changes */ int num_ping=0; cl_log(LOG_INFO, "Link Status update: Link %s/%s now has status %s" , node, lnk, status); if (strcmp(status, DEADSTATUS) == 0) { /* If we can still see pinging node, request resources */ if ((num_ping = ping_node_status(private))) { ask_ping_nodes(private, num_ping); cl_log(LOG_INFO, "Checking remote count" " of ping nodes."); } else { cl_log(LOG_INFO, "We are dead. :<"); ask_ping_nodes(private, num_ping); } }}intping_node_status(ll_cluster_t *hb){ /* ping_node_status: Takes the hearbeat cluster as input, * returns number of ping nodes found to be in the cluster, * and therefore alive. */ const char *node; int found=0; /* Number of ping nodes found */ if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot start node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(16); } while((node = hb->llc_ops->nextnode(hb))!= NULL) { if (!strcmp(PINGSTATUS, hb->llc_ops->node_status(hb, node))) { cl_log(LOG_DEBUG, "Found ping node %s!", node); found++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -