📄 dlmdomain.c
字号:
int dlm_shutting_down(struct dlm_ctxt *dlm){ int ret = 0; spin_lock(&dlm_domain_lock); if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) ret = 1; spin_unlock(&dlm_domain_lock); return ret;}void dlm_unregister_domain(struct dlm_ctxt *dlm){ int leave = 0; spin_lock(&dlm_domain_lock); BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); BUG_ON(!dlm->num_joins); dlm->num_joins--; if (!dlm->num_joins) { /* We mark it "in shutdown" now so new register * requests wait until we've completely left the * domain. Don't use DLM_CTXT_LEAVING yet as we still * want new domain joins to communicate with us at * least until we've completed migration of our * resources. */ dlm->dlm_state = DLM_CTXT_IN_SHUTDOWN; leave = 1; } spin_unlock(&dlm_domain_lock); if (leave) { mlog(0, "shutting down domain %s\n", dlm->name); /* We changed dlm state, notify the thread */ dlm_kick_thread(dlm, NULL); dlm_migrate_all_locks(dlm); dlm_mark_domain_leaving(dlm); dlm_leave_domain(dlm); dlm_complete_dlm_shutdown(dlm); } dlm_put(dlm);}EXPORT_SYMBOL_GPL(dlm_unregister_domain);static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data){ struct dlm_query_join_request *query; enum dlm_query_join_response response; struct dlm_ctxt *dlm = NULL; query = (struct dlm_query_join_request *) msg->buf; mlog(0, "node %u wants to join domain %s\n", query->node_idx, query->domain); /* * If heartbeat doesn't consider the node live, tell it * to back off and try again. This gives heartbeat a chance * to catch up. */ if (!o2hb_check_node_heartbeating(query->node_idx)) { mlog(0, "node %u is not in our live map yet\n", query->node_idx); response = JOIN_DISALLOW; goto respond; } response = JOIN_OK_NO_MAP; spin_lock(&dlm_domain_lock); dlm = __dlm_lookup_domain_full(query->domain, query->name_len); /* Once the dlm ctxt is marked as leaving then we don't want * to be put in someone's domain map. * Also, explicitly disallow joining at certain troublesome * times (ie. during recovery). */ if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { int bit = query->node_idx; spin_lock(&dlm->spinlock); if (dlm->dlm_state == DLM_CTXT_NEW && dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN) { /*If this is a brand new context and we * haven't started our join process yet, then * the other node won the race. */ response = JOIN_OK_NO_MAP; } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { /* Disallow parallel joins. */ response = JOIN_DISALLOW; } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { mlog(ML_NOTICE, "node %u trying to join, but recovery " "is ongoing.\n", bit); response = JOIN_DISALLOW; } else if (test_bit(bit, dlm->recovery_map)) { mlog(ML_NOTICE, "node %u trying to join, but it " "still needs recovery.\n", bit); response = JOIN_DISALLOW; } else if (test_bit(bit, dlm->domain_map)) { mlog(ML_NOTICE, "node %u trying to join, but it " "is still in the domain! needs recovery?\n", bit); response = JOIN_DISALLOW; } else { /* Alright we're fully a part of this domain * so we keep some state as to who's joining * and indicate to him that needs to be fixed * up. */ response = JOIN_OK; __dlm_set_joining_node(dlm, query->node_idx); } spin_unlock(&dlm->spinlock); } spin_unlock(&dlm_domain_lock);respond: mlog(0, "We respond with %u\n", response); return response;}static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data){ struct dlm_assert_joined *assert; struct dlm_ctxt *dlm = NULL; assert = (struct dlm_assert_joined *) msg->buf; mlog(0, "node %u asserts join on domain %s\n", assert->node_idx, assert->domain); spin_lock(&dlm_domain_lock); dlm = __dlm_lookup_domain_full(assert->domain, assert->name_len); /* XXX should we consider no dlm ctxt an error? */ if (dlm) { spin_lock(&dlm->spinlock); /* Alright, this node has officially joined our * domain. Set him in the map and clean up our * leftover join state. */ BUG_ON(dlm->joining_node != assert->node_idx); set_bit(assert->node_idx, dlm->domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ dlm_hb_event_notify_attached(dlm, assert->node_idx, 1); spin_unlock(&dlm->spinlock); } spin_unlock(&dlm_domain_lock); return 0;}static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data){ struct dlm_cancel_join *cancel; struct dlm_ctxt *dlm = NULL; cancel = (struct dlm_cancel_join *) msg->buf; mlog(0, "node %u cancels join on domain %s\n", cancel->node_idx, cancel->domain); spin_lock(&dlm_domain_lock); dlm = __dlm_lookup_domain_full(cancel->domain, cancel->name_len); if (dlm) { spin_lock(&dlm->spinlock); /* Yikes, this guy wants to cancel his join. No * problem, we simply cleanup our join state. */ BUG_ON(dlm->joining_node != cancel->node_idx); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); spin_unlock(&dlm->spinlock); } spin_unlock(&dlm_domain_lock); return 0;}static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm, unsigned int node){ int status; struct dlm_cancel_join cancel_msg; memset(&cancel_msg, 0, sizeof(cancel_msg)); cancel_msg.node_idx = dlm->node_num; cancel_msg.name_len = strlen(dlm->name); memcpy(cancel_msg.domain, dlm->name, cancel_msg.name_len); status = o2net_send_message(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, &cancel_msg, sizeof(cancel_msg), node, NULL); if (status < 0) { mlog_errno(status); goto bail; }bail: return status;}/* map_size should be in bytes. */static int dlm_send_join_cancels(struct dlm_ctxt *dlm, unsigned long *node_map, unsigned int map_size){ int status, tmpstat; unsigned int node; if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))) { mlog(ML_ERROR, "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n", map_size, BITS_TO_LONGS(O2NM_MAX_NODES)); return -EINVAL; } status = 0; node = -1; while ((node = find_next_bit(node_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { if (node == dlm->node_num) continue; tmpstat = dlm_send_one_join_cancel(dlm, node); if (tmpstat) { mlog(ML_ERROR, "Error return %d cancelling join on " "node %d\n", tmpstat, node); if (!status) status = tmpstat; } } if (status) mlog_errno(status); return status;}static int dlm_request_join(struct dlm_ctxt *dlm, int node, enum dlm_query_join_response *response){ int status, retval; struct dlm_query_join_request join_msg; mlog(0, "querying node %d\n", node); memset(&join_msg, 0, sizeof(join_msg)); join_msg.node_idx = dlm->node_num; join_msg.name_len = strlen(dlm->name); memcpy(join_msg.domain, dlm->name, join_msg.name_len); status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, sizeof(join_msg), node, &retval); if (status < 0 && status != -ENOPROTOOPT) { mlog_errno(status); goto bail; } /* -ENOPROTOOPT from the net code means the other side isn't listening for our message type -- that's fine, it means his dlm isn't up, so we can consider him a 'yes' but not joined into the domain. */ if (status == -ENOPROTOOPT) { status = 0; *response = JOIN_OK_NO_MAP; } else if (retval == JOIN_DISALLOW || retval == JOIN_OK || retval == JOIN_OK_NO_MAP) { *response = retval; } else { status = -EINVAL; mlog(ML_ERROR, "invalid response %d from node %u\n", retval, node); } mlog(0, "status %d, node %d response is %d\n", status, node, *response);bail: return status;}static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, unsigned int node){ int status; struct dlm_assert_joined assert_msg; mlog(0, "Sending join assert to node %u\n", node); memset(&assert_msg, 0, sizeof(assert_msg)); assert_msg.node_idx = dlm->node_num; assert_msg.name_len = strlen(dlm->name); memcpy(assert_msg.domain, dlm->name, assert_msg.name_len); status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, &assert_msg, sizeof(assert_msg), node, NULL); if (status < 0) mlog_errno(status); return status;}static void dlm_send_join_asserts(struct dlm_ctxt *dlm, unsigned long *node_map){ int status, node, live; status = 0; node = -1; while ((node = find_next_bit(node_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { if (node == dlm->node_num) continue; do { /* It is very important that this message be * received so we spin until either the node * has died or it gets the message. */ status = dlm_send_one_join_assert(dlm, node); spin_lock(&dlm->spinlock); live = test_bit(node, dlm->live_nodes_map); spin_unlock(&dlm->spinlock); if (status) { mlog(ML_ERROR, "Error return %d asserting " "join on node %d\n", status, node); /* give us some time between errors... */ if (live) msleep(DLM_DOMAIN_BACKOFF_MS); } } while (status && live); }}struct domain_join_ctxt { unsigned long live_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long yes_resp_map[BITS_TO_LONGS(O2NM_MAX_NODES)];};static int dlm_should_restart_join(struct dlm_ctxt *dlm, struct domain_join_ctxt *ctxt, enum dlm_query_join_response response){ int ret; if (response == JOIN_DISALLOW) { mlog(0, "Latest response of disallow -- should restart\n"); return 1; } spin_lock(&dlm->spinlock); /* For now, we restart the process if the node maps have * changed at all */ ret = memcmp(ctxt->live_map, dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); spin_unlock(&dlm->spinlock); if (ret) mlog(0, "Node maps changed -- should restart\n"); return ret;}static int dlm_try_to_join_domain(struct dlm_ctxt *dlm){ int status = 0, tmpstat, node; struct domain_join_ctxt *ctxt; enum dlm_query_join_response response; mlog_entry("%p", dlm); ctxt = kcalloc(1, sizeof(*ctxt), GFP_KERNEL); if (!ctxt) { status = -ENOMEM; mlog_errno(status); goto bail; } /* group sem locking should work for us here -- we're already * registered for heartbeat events so filling this should be * atomic wrt getting those handlers called. */ o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); spin_lock(&dlm->spinlock); memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map)); __dlm_set_joining_node(dlm, dlm->node_num); spin_unlock(&dlm->spinlock); node = -1; while ((node = find_next_bit(ctxt->live_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { if (node == dlm->node_num) continue; status = dlm_request_join(dlm, node, &response); if (status < 0) { mlog_errno(status); goto bail; } /* Ok, either we got a response or the node doesn't have a * dlm up. */ if (response == JOIN_OK) set_bit(node, ctxt->yes_resp_map); if (dlm_should_restart_join(dlm, ctxt, response)) { status = -EAGAIN; goto bail; } } mlog(0, "Yay, done querying nodes!\n"); /* Yay, everyone agree's we can join the domain. My domain is * comprised of all nodes who were put in the * yes_resp_map. Copy that into our domain map and send a join * assert message to clean up everyone elses state. */ spin_lock(&dlm->spinlock); memcpy(dlm->domain_map, ctxt->yes_resp_map, sizeof(ctxt->yes_resp_map)); set_bit(dlm->node_num, dlm->domain_map); spin_unlock(&dlm->spinlock); dlm_send_join_asserts(dlm, ctxt->yes_resp_map); /* Joined state *must* be set before the joining node * information, otherwise the query_join handler may read no * current joiner but a state of NEW and tell joining nodes * we're not in the domain. */ spin_lock(&dlm_domain_lock); dlm->dlm_state = DLM_CTXT_JOINED; dlm->num_joins++; spin_unlock(&dlm_domain_lock);bail: spin_lock(&dlm->spinlock); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); if (!status) __dlm_print_nodes(dlm); spin_unlock(&dlm->spinlock); if (ctxt) { /* Do we need to send a cancel message to any nodes? */ if (status < 0) { tmpstat = dlm_send_join_cancels(dlm, ctxt->yes_resp_map, sizeof(ctxt->yes_resp_map)); if (tmpstat < 0) mlog_errno(tmpstat); } kfree(ctxt); } mlog(0, "returning %d\n", status); return status;}static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm){ o2hb_unregister_callback(&dlm->dlm_hb_up); o2hb_unregister_callback(&dlm->dlm_hb_down); o2net_unregister_handler_list(&dlm->dlm_domain_handlers);}static int dlm_register_domain_handlers(struct dlm_ctxt *dlm){ int status; mlog(0, "registering handlers.\n"); o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); status = o2hb_register_callback(&dlm->dlm_hb_down); if (status) goto bail;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -