📄 ipfail.c
字号:
} } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end node walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(17); } return found;}gbooleangiveup(gpointer user_data){ /* Giveup: Takes the heartbeat cluster as input and the type of * resources to give up. Returning FALSE causes the timer to die. * Forces the local node to release a particular class of resources. */ struct giveup_data *gd = user_data; ll_cluster_t *hb = gd->hb; const char *res_type = gd->res_type; struct ha_msg *msg; char pid[10]; cl_log(LOG_INFO, "giveup() called (timeout worked)"); if (is_stable(hb)) { memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); msg = ha_msg_new(3); ha_msg_add(msg, F_TYPE, T_ASKRESOURCES); ha_msg_add(msg, F_RESOURCES, res_type); ha_msg_add(msg, F_ORIG, node_name); ha_msg_add(msg, F_COMMENT, "me"); hb->llc_ops->sendclustermsg(hb, msg); cl_log(LOG_DEBUG, "Message [" T_ASKRESOURCES "] sent."); ha_msg_del(msg); need_standby = 0; } else { need_standby = 1; } return FALSE;}voiddelay_giveup(ll_cluster_t *hb, const char *res_type, int mseconds){ struct giveup_data *gd; gd = malloc(sizeof(struct giveup_data)); if (gd == NULL) { cl_log(LOG_ERR, "Out of memory, can't giveup."); return; } gd->hb = hb; gd->res_type = res_type; /* Set mseconds to -1 to use default. (twice the keepalive) */ if (mseconds < 0) { mseconds = hb->llc_ops->get_keepalive(hb) * 2; } cl_log(LOG_INFO, "Delayed giveup in %i seconds.", mseconds / 1000); if (delay_giveup_tag) { /* A timer exists already? */ cl_log(LOG_DEBUG, "Detected existing delay timer, overriding"); Gmain_timeout_remove(delay_giveup_tag); delay_giveup_tag = 0; } /* We are going to call giveup in mseconds/1000 Seconds. */ delay_giveup_tag = Gmain_timeout_add_full(G_PRIORITY_DEFAULT, mseconds, giveup, (gpointer)gd, giveup_destroy);}voidgiveup_destroy(gpointer user_data){ /* Clean up the struct giveup_data that we were using */ free(user_data); delay_giveup_tag = 0; cl_log(LOG_DEBUG, "giveup timeout has been destroyed.");}voidabort_giveup(){ if (delay_giveup_tag) { cl_log(LOG_INFO, "Aborted delayed giveup (%u)", delay_giveup_tag); Gmain_timeout_remove(delay_giveup_tag); delay_giveup_tag = 0; } else { cl_log(LOG_INFO, "No giveup timer to abort."); }}voidsend_abort_giveup(ll_cluster_t *hb){ struct ha_msg *msg; msg = ha_msg_new(2); ha_msg_add(msg, F_TYPE, "abort_giveup"); ha_msg_add(msg, F_ORIG, node_name); hb->llc_ops->sendnodemsg(hb, msg, other_node); cl_log(LOG_DEBUG, "Abort message sent."); ha_msg_del(msg);}voidmsg_abort_giveup(const struct ha_msg *msg, void *private){ abort_giveup();}voidmsg_ipfail_join(const struct ha_msg *msg, void *private){ /* msg_ipfail_join: When another ipfail client sends a join * message, call ask_ping_nodes() to compare ping node counts. * Callback for the T_APICLISTAT message. */ /* If this is a join message from ipfail on a different node.... */ if (!strcmp(ha_msg_value(msg, F_STATUS), JOINSTATUS) && !strcmp(ha_msg_value(msg, F_FROMID), "ipfail") && strcmp(ha_msg_value(msg, F_ORIG), node_name)) { cl_log(LOG_DEBUG, "Got join message from another ipfail client. (%s)", ha_msg_value(msg, F_ORIG)); ask_ping_nodes(private, ping_node_status(private)); }}voidmsg_resources(const struct ha_msg *msg, void *private){ const char * orig = ha_msg_value(msg, F_ORIG); const char * isstable = ha_msg_value(msg, F_ISSTABLE); /* msg_resources: Catch T_RESOURCES messages, so that we can * find out when stability is achieved among the cluster */ if (!orig || !isstable) { return; /* No stability info in this message... */ } /* Right now there are two stable messages sent out, we are * only concerned with the one that has no info= line on it. */ if (!strcmp(orig, other_node) && !ha_msg_value(msg, F_COMMENT) && !strcmp(isstable, "1")) { cl_log(LOG_DEBUG, "Other side is now stable."); node_stable = 1; /* There may be a pending standby */ if (need_standby) { /* Gratuitious ARPs take some time, is there a * way to know when they're finished? I don't * want this sleep here, even if it only is during * startup. */ /* This value is prone to be wrong for different * situations. We need the resource stability * message to be delayed until the resource scripts * finish, and then we can stop waiting. */ sleep(10); /* If the resource message stuff is solved, we could * safely giveup() here. However, since we're waiting * for arbitrary amounts of time it may be wise to * recheck the assumptions of the cluster and count * ping nodes. */ ask_ping_nodes(private, ping_node_status(private)); /* giveup(private); */ /* The ask_ping_nodes message will sort out whether * a standby is necessary. */ need_standby = 0; } } else if (!strcmp(orig, other_node) && !strcmp(isstable, "0")) { cl_log(LOG_DEBUG, "Other side is unstable."); node_stable = 0; }}voidask_ping_nodes(ll_cluster_t *hb, int num_ping){ /* ask_ping_nodes: Takes the heartbeat cluster and the number of * ping nodes we can see alive as input, returning nothing. * It asks the other node for the number of ping nodes it can see. */ struct ha_msg *msg; char pid[10], np[5]; cl_log(LOG_INFO, "Asking other side for ping node count."); memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); memset(np, 0, sizeof(np)); snprintf(np, sizeof(np), "%d", num_ping); msg = ha_msg_new(3); ha_msg_add(msg, F_TYPE, "num_ping_nodes"); ha_msg_add(msg, F_ORIG, node_name); ha_msg_add(msg, F_NUMPING, np); hb->llc_ops->sendnodemsg(hb, msg, other_node); cl_log(LOG_DEBUG, "Message [" F_NUMPING "] sent."); ha_msg_del(msg);}voidmsg_ping_nodes(const struct ha_msg *msg, void *private){ /* msg_ping_nodes: Takes the message and heartbeat cluster as input; * returns nothing. Callback for the num_ping_nodes message. */ int num_nodes=0; ll_cluster_t *hb = private; cl_log(LOG_DEBUG, "Got asked for num_ping."); num_nodes = ping_node_status(hb); if (num_nodes > atoi(ha_msg_value(msg, F_NUMPING))) { cl_log(LOG_INFO, "Telling other node that we have more visible ping " "nodes."); you_are_dead(hb); } else if (num_nodes < atoi(ha_msg_value(msg, F_NUMPING))) { cl_log(LOG_INFO, "Giving up because we have less visible ping nodes."); delay_giveup(hb, HB_ALL_RESOURCES, -1); } else { cl_log(LOG_INFO, "Ping node count is balanced."); send_abort_giveup(hb); if (delay_giveup_tag) { /* We've got a delayed giveup, and we're now balanced*/ /* BUG? We don't want to do this if we have an auto_failback pending, I think. */ abort_giveup(); } else if (auto_failback && is_stable(hb)) { /* We're balanced, so make sure we don't have foreign * stuff */ cl_log(LOG_INFO, "Giving up foreign resources (auto_failback)."); delay_giveup(hb, HB_FOREIGN_RESOURCES, -1); } }}voidyou_are_dead(ll_cluster_t *hb){ /* you_are_dead: Takes the heartbeat cluster as input; returns nothing. * Sends the you_are_dead message to the dead node. */ struct ha_msg *msg; char pid[10]; cl_log(LOG_DEBUG, "Sending you_are_dead."); memset(pid, 0, sizeof(pid)); snprintf(pid, sizeof(pid), "%ld", (long)getpid()); msg = ha_msg_new(1); ha_msg_add(msg, F_TYPE, "you_are_dead"); hb->llc_ops->sendnodemsg(hb, msg, other_node); cl_log(LOG_DEBUG, "Message [you_are_dead] sent."); ha_msg_del(msg);}voidi_am_dead(const struct ha_msg *msg, void *private){ /* i_am_dead: Takes the you_are_dead message and the heartbeat cluster * as input; returns nothing. * Callback for the you_are_dead message. */ cl_log(LOG_INFO, "Giving up because we were told that we have less ping nodes."); delay_giveup(private, HB_ALL_RESOURCES, -1);}voidgotsig(int nsig){ (void)nsig; quitnow = 1;}/* Used to handle the API in the gmainloop */gboolean ipfail_dispatch(int fd, gpointer user_data){ struct ha_msg *reply; ll_cluster_t *hb = user_data; /* if (hb->llc_ops->msgready(hb)) cl_log(LOG_DEBUG, "Msg ready!"); cl_log(LOG_DEBUG, "Reading a message!"); */ reply = hb->llc_ops->readmsg(hb, 0); if (reply != NULL) { /* cl_log_message(reply); */ ha_msg_del(reply); reply=NULL; return TRUE; } /* else return FALSE; */ return TRUE;}voidipfail_dispatch_destroy(gpointer user_data){ return;}gbooleanipfail_timeout_dispatch(gpointer user_data){ ll_cluster_t *hb = user_data; if (quitnow) { g_main_quit(mainloop); return FALSE; } if (hb->llc_ops->msgready(hb)) { /* cl_log(LOG_DEBUG, "Msg ready! [2]"); */ return ipfail_dispatch(-1, user_data); } return TRUE;}voidopen_api(ll_cluster_t *hb){ /* Sign in to the API and setup the log facility */ int facility; cl_log(LOG_DEBUG, "Signing in with heartbeat"); if (hb->llc_ops->signon(hb, "ipfail")!= HA_OK) { cl_log(LOG_ERR, "Cannot sign on with heartbeat"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(1); } if ((facility = hb->llc_ops->get_logfacility(hb)) <= 0) { facility = DEFAULT_FACILITY; } cl_log_set_facility(facility);}voidclose_api(ll_cluster_t *hb){ /* Log off of the API and clean up */ if (hb->llc_ops->signoff(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot sign off from heartbeat."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); exit(14); } if (hb->llc_ops->delete(hb) != HA_OK) { cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_log(LOG_ERR, "Cannot delete API object."); exit(15); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -