📄 dlmdomain.c
字号:
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmdomain.c * * defines domain join / leave apis * * Copyright (C) 2004 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. * */#include <linux/module.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/utsname.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/delay.h>#include <linux/err.h>#include "cluster/heartbeat.h"#include "cluster/nodemanager.h"#include "cluster/tcp.h"#include "dlmapi.h"#include "dlmcommon.h"#include "dlmdomain.h"#include "dlmver.h"#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)#include "cluster/masklog.h"/* * ocfs2 node maps are array of long int, which limits to send them freely * across the wire due to endianness issues. To workaround this, we convert * long ints to byte arrays. Following 3 routines are helper functions to * set/test/copy bits within those array of bytes */static inline void byte_set_bit(u8 nr, u8 map[]){ map[nr >> 3] |= (1UL << (nr & 7));}static inline int byte_test_bit(u8 nr, u8 map[]){ return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;}static inline void byte_copymap(u8 dmap[], unsigned long smap[], unsigned int sz){ unsigned int nn; if (!sz) return; memset(dmap, 0, ((sz + 7) >> 3)); for (nn = 0 ; nn < sz; nn++) if (test_bit(nn, smap)) byte_set_bit(nn, dmap);}static void dlm_free_pagevec(void **vec, int pages){ while (pages--) free_page((unsigned long)vec[pages]); kfree(vec);}static void **dlm_alloc_pagevec(int pages){ void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL); int i; if (!vec) return NULL; for (i = 0; i < pages; i++) if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL))) goto out_free; mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n", pages, (unsigned long)DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE); return vec;out_free: dlm_free_pagevec(vec, i); return NULL;}/* * * spinlock lock ordering: if multiple locks are needed, obey this ordering: * dlm_domain_lock * struct dlm_ctxt->spinlock * struct dlm_lock_resource->spinlock * struct dlm_ctxt->master_lock * struct dlm_ctxt->ast_lock * dlm_master_list_entry->spinlock * dlm_lock->spinlock * */DEFINE_SPINLOCK(dlm_domain_lock);LIST_HEAD(dlm_domains);static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);#define DLM_DOMAIN_BACKOFF_MS 200static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data);static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data);static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data);static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data);static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);void __dlm_unhash_lockres(struct dlm_lock_resource *lockres){ if (!hlist_unhashed(&lockres->hash_node)) { hlist_del_init(&lockres->hash_node); dlm_lockres_put(lockres); }}void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res){ struct hlist_head *bucket; struct qstr *q; assert_spin_locked(&dlm->spinlock); q = &res->lockname; bucket = dlm_lockres_hash(dlm, q->hash); /* get a reference for our hashtable */ dlm_lockres_get(res); hlist_add_head(&res->hash_node, bucket);}struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, const char *name, unsigned int len, unsigned int hash){ struct hlist_head *bucket; struct hlist_node *list; mlog_entry("%.*s\n", len, name); assert_spin_locked(&dlm->spinlock); bucket = dlm_lockres_hash(dlm, hash); hlist_for_each(list, bucket) { struct dlm_lock_resource *res = hlist_entry(list, struct dlm_lock_resource, hash_node); if (res->lockname.name[0] != name[0]) continue; if (unlikely(res->lockname.len != len)) continue; if (memcmp(res->lockname.name + 1, name + 1, len - 1)) continue; dlm_lockres_get(res); return res; } return NULL;}/* intended to be called by functions which do not care about lock * resources which are being purged (most net _handler functions). * this will return NULL for any lock resource which is found but * currently in the process of dropping its mastery reference. * use __dlm_lookup_lockres_full when you need the lock resource * regardless (e.g. dlm_get_lock_resource) */struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, const char *name, unsigned int len, unsigned int hash){ struct dlm_lock_resource *res = NULL; mlog_entry("%.*s\n", len, name); assert_spin_locked(&dlm->spinlock); res = __dlm_lookup_lockres_full(dlm, name, len, hash); if (res) { spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_DROPPING_REF) { spin_unlock(&res->spinlock); dlm_lockres_put(res); return NULL; } spin_unlock(&res->spinlock); } return res;}struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, const char *name, unsigned int len){ struct dlm_lock_resource *res; unsigned int hash = dlm_lockid_hash(name, len); spin_lock(&dlm->spinlock); res = __dlm_lookup_lockres(dlm, name, len, hash); spin_unlock(&dlm->spinlock); return res;}static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len){ struct dlm_ctxt *tmp = NULL; struct list_head *iter; assert_spin_locked(&dlm_domain_lock); /* tmp->name here is always NULL terminated, * but domain may not be! */ list_for_each(iter, &dlm_domains) { tmp = list_entry (iter, struct dlm_ctxt, list); if (strlen(tmp->name) == len && memcmp(tmp->name, domain, len)==0) break; tmp = NULL; } return tmp;}/* For null terminated domain strings ONLY */static struct dlm_ctxt * __dlm_lookup_domain(const char *domain){ assert_spin_locked(&dlm_domain_lock); return __dlm_lookup_domain_full(domain, strlen(domain));}/* returns true on one of two conditions: * 1) the domain does not exist * 2) the domain exists and it's state is "joined" */static int dlm_wait_on_domain_helper(const char *domain){ int ret = 0; struct dlm_ctxt *tmp = NULL; spin_lock(&dlm_domain_lock); tmp = __dlm_lookup_domain(domain); if (!tmp) ret = 1; else if (tmp->dlm_state == DLM_CTXT_JOINED) ret = 1; spin_unlock(&dlm_domain_lock); return ret;}static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm){ if (dlm->lockres_hash) dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); if (dlm->name) kfree(dlm->name); kfree(dlm);}/* A little strange - this function will be called while holding * dlm_domain_lock and is expected to be holding it on the way out. We * will however drop and reacquire it multiple times */static void dlm_ctxt_release(struct kref *kref){ struct dlm_ctxt *dlm; dlm = container_of(kref, struct dlm_ctxt, dlm_refs); BUG_ON(dlm->num_joins); BUG_ON(dlm->dlm_state == DLM_CTXT_JOINED); /* we may still be in the list if we hit an error during join. */ list_del_init(&dlm->list); spin_unlock(&dlm_domain_lock); mlog(0, "freeing memory from domain %s\n", dlm->name); wake_up(&dlm_domain_events); dlm_free_ctxt_mem(dlm); spin_lock(&dlm_domain_lock);}void dlm_put(struct dlm_ctxt *dlm){ spin_lock(&dlm_domain_lock); kref_put(&dlm->dlm_refs, dlm_ctxt_release); spin_unlock(&dlm_domain_lock);}static void __dlm_get(struct dlm_ctxt *dlm){ kref_get(&dlm->dlm_refs);}/* given a questionable reference to a dlm object, gets a reference if * it can find it in the list, otherwise returns NULL in which case * you shouldn't trust your pointer. */struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm){ struct list_head *iter; struct dlm_ctxt *target = NULL; spin_lock(&dlm_domain_lock); list_for_each(iter, &dlm_domains) { target = list_entry (iter, struct dlm_ctxt, list); if (target == dlm) { __dlm_get(target); break; } target = NULL; } spin_unlock(&dlm_domain_lock); return target;}int dlm_domain_fully_joined(struct dlm_ctxt *dlm){ int ret; spin_lock(&dlm_domain_lock); ret = (dlm->dlm_state == DLM_CTXT_JOINED) || (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN); spin_unlock(&dlm_domain_lock); return ret;}static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm){ if (dlm->dlm_worker) { flush_workqueue(dlm->dlm_worker); destroy_workqueue(dlm->dlm_worker); dlm->dlm_worker = NULL; }}static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm){ dlm_unregister_domain_handlers(dlm); dlm_complete_thread(dlm); dlm_complete_recovery_thread(dlm); dlm_destroy_dlm_worker(dlm); /* We've left the domain. Now we can take ourselves out of the * list and allow the kref stuff to help us free the * memory. */ spin_lock(&dlm_domain_lock); list_del_init(&dlm->list); spin_unlock(&dlm_domain_lock); /* Wake up anyone waiting for us to remove this domain */ wake_up(&dlm_domain_events);}static int dlm_migrate_all_locks(struct dlm_ctxt *dlm){ int i, num, n, ret = 0; struct dlm_lock_resource *res; struct hlist_node *iter; struct hlist_head *bucket; int dropped; mlog(0, "Migrating locks from domain %s\n", dlm->name); num = 0; spin_lock(&dlm->spinlock); for (i = 0; i < DLM_HASH_BUCKETS; i++) {redo_bucket: n = 0; bucket = dlm_lockres_hash(dlm, i); iter = bucket->first; while (iter) { n++; res = hlist_entry(iter, struct dlm_lock_resource, hash_node); dlm_lockres_get(res); /* migrate, if necessary. this will drop the dlm * spinlock and retake it if it does migration. */ dropped = dlm_empty_lockres(dlm, res); spin_lock(&res->spinlock); __dlm_lockres_calc_usage(dlm, res); iter = res->hash_node.next; spin_unlock(&res->spinlock); dlm_lockres_put(res); if (dropped) goto redo_bucket; } cond_resched_lock(&dlm->spinlock); num += n; mlog(0, "%s: touched %d lockreses in bucket %d " "(tot=%d)\n", dlm->name, n, i, num); } spin_unlock(&dlm->spinlock); wake_up(&dlm->dlm_thread_wq); /* let the dlm thread take care of purging, keep scanning until * nothing remains in the hash */ if (num) { mlog(0, "%s: %d lock resources in hash last pass\n", dlm->name, num); ret = -EAGAIN; } mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); return ret;}static int dlm_no_joining_node(struct dlm_ctxt *dlm){ int ret; spin_lock(&dlm->spinlock); ret = dlm->joining_node == DLM_LOCK_RES_OWNER_UNKNOWN; spin_unlock(&dlm->spinlock); return ret;}static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm){ /* Yikes, a double spinlock! I need domain_lock for the dlm * state and the dlm spinlock for join state... Sorry! */again: spin_lock(&dlm_domain_lock); spin_lock(&dlm->spinlock); if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { mlog(0, "Node %d is joining, we wait on it.\n", dlm->joining_node); spin_unlock(&dlm->spinlock); spin_unlock(&dlm_domain_lock); wait_event(dlm->dlm_join_events, dlm_no_joining_node(dlm)); goto again; } dlm->dlm_state = DLM_CTXT_LEAVING; spin_unlock(&dlm->spinlock); spin_unlock(&dlm_domain_lock);}static void __dlm_print_nodes(struct dlm_ctxt *dlm){ int node = -1; assert_spin_locked(&dlm->spinlock); printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name); while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { printk("%d ", node); } printk("\n");}static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data){ struct dlm_ctxt *dlm = data; unsigned int node; struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; mlog_entry("%p %u %p", msg, len, data); if (!dlm_grab(dlm)) return 0; node = exit_msg->node_idx; printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name); spin_lock(&dlm->spinlock); clear_bit(node, dlm->domain_map); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ dlm_hb_event_notify_attached(dlm, node, 0); spin_unlock(&dlm->spinlock); dlm_put(dlm); return 0;}static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, unsigned int node){ int status; struct dlm_exit_domain leave_msg; mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", node, dlm->name, dlm->node_num); memset(&leave_msg, 0, sizeof(leave_msg)); leave_msg.node_idx = dlm->node_num; status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, &leave_msg, sizeof(leave_msg), node, NULL); mlog(0, "status return %d from o2net_send_message\n", status); return status;}static void dlm_leave_domain(struct dlm_ctxt *dlm){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -