📄 dlmmaster.c
字号:
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmmod.c * * standalone DLM module * * Copyright (C) 2004 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. * */#include <linux/module.h>#include <linux/fs.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/utsname.h>#include <linux/init.h>#include <linux/sysctl.h>#include <linux/random.h>#include <linux/blkdev.h>#include <linux/socket.h>#include <linux/inet.h>#include <linux/spinlock.h>#include <linux/delay.h>#include "cluster/heartbeat.h"#include "cluster/nodemanager.h"#include "cluster/tcp.h"#include "dlmapi.h"#include "dlmcommon.h"#include "dlmdomain.h"#include "dlmdebug.h"#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)#include "cluster/masklog.h"static void dlm_mle_node_down(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, struct o2nm_node *node, int idx);static void dlm_mle_node_up(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, struct o2nm_node *node, int idx);static void dlm_assert_master_worker(struct dlm_work_item *item, void *data);static int dlm_do_assert_master(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, void *nodemap, u32 flags);static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data);static inline int dlm_mle_equal(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, const char *name, unsigned int namelen){ struct dlm_lock_resource *res; if (dlm != mle->dlm) return 0; if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) { if (namelen != mle->u.name.len || memcmp(name, mle->u.name.name, namelen)!=0) return 0; } else { res = mle->u.res; if (namelen != res->lockname.len || memcmp(res->lockname.name, name, namelen) != 0) return 0; } return 1;}static struct kmem_cache *dlm_lockres_cache = NULL;static struct kmem_cache *dlm_lockname_cache = NULL;static struct kmem_cache *dlm_mle_cache = NULL;static void dlm_mle_release(struct kref *kref);static void dlm_init_mle(struct dlm_master_list_entry *mle, enum dlm_mle_type type, struct dlm_ctxt *dlm, struct dlm_lock_resource *res, const char *name, unsigned int namelen);static void dlm_put_mle(struct dlm_master_list_entry *mle);static void __dlm_put_mle(struct dlm_master_list_entry *mle);static int dlm_find_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry **mle, char *name, unsigned int namelen);static int dlm_do_master_request(struct dlm_lock_resource *res, struct dlm_master_list_entry *mle, int to);static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_master_list_entry *mle, int *blocked);static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_master_list_entry *mle, int blocked);static int dlm_add_migration_mle(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_master_list_entry *mle, struct dlm_master_list_entry **oldmle, const char *name, unsigned int namelen, u8 new_master, u8 master);static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 target);static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);int dlm_is_host_down(int errno){ switch (errno) { case -EBADF: case -ECONNREFUSED: case -ENOTCONN: case -ECONNRESET: case -EPIPE: case -EHOSTDOWN: case -EHOSTUNREACH: case -ETIMEDOUT: case -ECONNABORTED: case -ENETDOWN: case -ENETUNREACH: case -ENETRESET: case -ESHUTDOWN: case -ENOPROTOOPT: case -EINVAL: /* if returned from our tcp code, this means there is no socket */ return 1; } return 0;}/* * MASTER LIST FUNCTIONS *//* * regarding master list entries and heartbeat callbacks: * * in order to avoid sleeping and allocation that occurs in * heartbeat, master list entries are simply attached to the * dlm's established heartbeat callbacks. the mle is attached * when it is created, and since the dlm->spinlock is held at * that time, any heartbeat event will be properly discovered * by the mle. the mle needs to be detached from the * dlm->mle_hb_events list as soon as heartbeat events are no * longer useful to the mle, and before the mle is freed. * * as a general rule, heartbeat events are no longer needed by * the mle once an "answer" regarding the lock master has been * received. */static inline void __dlm_mle_attach_hb_events(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle){ assert_spin_locked(&dlm->spinlock); list_add_tail(&mle->hb_events, &dlm->mle_hb_events);}static inline void __dlm_mle_detach_hb_events(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle){ if (!list_empty(&mle->hb_events)) list_del_init(&mle->hb_events);}static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle){ spin_lock(&dlm->spinlock); __dlm_mle_detach_hb_events(dlm, mle); spin_unlock(&dlm->spinlock);}static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle){ struct dlm_ctxt *dlm; dlm = mle->dlm; assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); mle->inuse++; kref_get(&mle->mle_refs);}static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle){ struct dlm_ctxt *dlm; dlm = mle->dlm; spin_lock(&dlm->spinlock); spin_lock(&dlm->master_lock); mle->inuse--; __dlm_put_mle(mle); spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock);}/* remove from list and free */static void __dlm_put_mle(struct dlm_master_list_entry *mle){ struct dlm_ctxt *dlm; dlm = mle->dlm; assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); if (!atomic_read(&mle->mle_refs.refcount)) { /* this may or may not crash, but who cares. * it's a BUG. */ mlog(ML_ERROR, "bad mle: %p\n", mle); dlm_print_one_mle(mle); BUG(); } else kref_put(&mle->mle_refs, dlm_mle_release);}/* must not have any spinlocks coming in */static void dlm_put_mle(struct dlm_master_list_entry *mle){ struct dlm_ctxt *dlm; dlm = mle->dlm; spin_lock(&dlm->spinlock); spin_lock(&dlm->master_lock); __dlm_put_mle(mle); spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock);}static inline void dlm_get_mle(struct dlm_master_list_entry *mle){ kref_get(&mle->mle_refs);}static void dlm_init_mle(struct dlm_master_list_entry *mle, enum dlm_mle_type type, struct dlm_ctxt *dlm, struct dlm_lock_resource *res, const char *name, unsigned int namelen){ assert_spin_locked(&dlm->spinlock); mle->dlm = dlm; mle->type = type; INIT_LIST_HEAD(&mle->list); INIT_LIST_HEAD(&mle->hb_events); memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); spin_lock_init(&mle->spinlock); init_waitqueue_head(&mle->wq); atomic_set(&mle->woken, 0); kref_init(&mle->mle_refs); memset(mle->response_map, 0, sizeof(mle->response_map)); mle->master = O2NM_MAX_NODES; mle->new_master = O2NM_MAX_NODES; mle->inuse = 0; if (mle->type == DLM_MLE_MASTER) { BUG_ON(!res); mle->u.res = res; } else if (mle->type == DLM_MLE_BLOCK) { BUG_ON(!name); memcpy(mle->u.name.name, name, namelen); mle->u.name.len = namelen; } else /* DLM_MLE_MIGRATION */ { BUG_ON(!name); memcpy(mle->u.name.name, name, namelen); mle->u.name.len = namelen; } /* copy off the node_map and register hb callbacks on our copy */ memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map)); memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map)); clear_bit(dlm->node_num, mle->vote_map); clear_bit(dlm->node_num, mle->node_map); /* attach the mle to the domain node up/down events */ __dlm_mle_attach_hb_events(dlm, mle);}/* returns 1 if found, 0 if not */static int dlm_find_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry **mle, char *name, unsigned int namelen){ struct dlm_master_list_entry *tmpmle; assert_spin_locked(&dlm->master_lock); list_for_each_entry(tmpmle, &dlm->master_list, list) { if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); *mle = tmpmle; return 1; } return 0;}void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up){ struct dlm_master_list_entry *mle; assert_spin_locked(&dlm->spinlock); list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { if (node_up) dlm_mle_node_up(dlm, mle, NULL, idx); else dlm_mle_node_down(dlm, mle, NULL, idx); }}static void dlm_mle_node_down(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, struct o2nm_node *node, int idx){ spin_lock(&mle->spinlock); if (!test_bit(idx, mle->node_map)) mlog(0, "node %u already removed from nodemap!\n", idx); else clear_bit(idx, mle->node_map); spin_unlock(&mle->spinlock);}static void dlm_mle_node_up(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, struct o2nm_node *node, int idx){ spin_lock(&mle->spinlock); if (test_bit(idx, mle->node_map)) mlog(0, "node %u already in node map!\n", idx); else set_bit(idx, mle->node_map); spin_unlock(&mle->spinlock);}int dlm_init_mle_cache(void){ dlm_mle_cache = kapi_kmem_cache_create("o2dlm_mle", sizeof(struct dlm_master_list_entry), 0, SLAB_HWCACHE_ALIGN, NULL); if (dlm_mle_cache == NULL) return -ENOMEM; return 0;}void dlm_destroy_mle_cache(void){ if (dlm_mle_cache) kmem_cache_destroy(dlm_mle_cache);}static void dlm_mle_release(struct kref *kref){ struct dlm_master_list_entry *mle; struct dlm_ctxt *dlm; mlog_entry_void(); mle = container_of(kref, struct dlm_master_list_entry, mle_refs); dlm = mle->dlm; if (mle->type != DLM_MLE_MASTER) { mlog(0, "calling mle_release for %.*s, type %d\n", mle->u.name.len, mle->u.name.name, mle->type); } else { mlog(0, "calling mle_release for %.*s, type %d\n", mle->u.res->lockname.len, mle->u.res->lockname.name, mle->type); } assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); /* remove from list if not already */ if (!list_empty(&mle->list)) list_del_init(&mle->list); /* detach the mle from the domain node up/down events */ __dlm_mle_detach_hb_events(dlm, mle); /* NOTE: kfree under spinlock here. * if this is bad, we can move this to a freelist. */ kmem_cache_free(dlm_mle_cache, mle);}/* * LOCK RESOURCE FUNCTIONS */int dlm_init_master_caches(void){ dlm_lockres_cache = kapi_kmem_cache_create("o2dlm_lockres", sizeof(struct dlm_lock_resource), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dlm_lockres_cache) goto bail; dlm_lockname_cache = kapi_kmem_cache_create("o2dlm_lockname", DLM_LOCKID_NAME_MAX, 0, SLAB_HWCACHE_ALIGN, NULL); if (!dlm_lockname_cache) goto bail; return 0;bail: dlm_destroy_master_caches(); return -ENOMEM;}void dlm_destroy_master_caches(void){ if (dlm_lockname_cache) kmem_cache_destroy(dlm_lockname_cache); if (dlm_lockres_cache) kmem_cache_destroy(dlm_lockres_cache);}static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 owner){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -