📄 dlmglue.c
字号:
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmglue.c * * Code which implements an OCFS2 specific interface to our DLM. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include <linux/types.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/mm.h>#include <linux/crc32.h>#include <linux/kthread.h>#include <linux/pagemap.h>#include <linux/debugfs.h>#include <linux/seq_file.h>#include <cluster/heartbeat.h>#include <cluster/nodemanager.h>#include <cluster/tcp.h>#include <dlm/dlmapi.h>#define MLOG_MASK_PREFIX ML_DLM_GLUE#include <cluster/masklog.h>#include "ocfs2.h"#include "alloc.h"#include "dcache.h"#include "dlmglue.h"#include "extent_map.h"#include "file.h"#include "heartbeat.h"#include "inode.h"#include "journal.h"#include "slot_map.h"#include "super.h"#include "uptodate.h"#include "vote.h"#include "buffer_head_io.h"struct ocfs2_mask_waiter { struct list_head mw_item; int mw_status; struct completion mw_complete; unsigned long mw_mask; unsigned long mw_goal;};static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);/* * Return value from ->downconvert_worker functions. * * These control the precise actions of ocfs2_unblock_lock() * and ocfs2_process_blocked_lock() * */enum ocfs2_unblock_action { UNBLOCK_CONTINUE = 0, /* Continue downconvert */ UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire * ->post_unlock callback */ UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire * ->post_unlock() callback. */};struct ocfs2_unblock_ctl { int requeue; enum ocfs2_unblock_action unblock_action;};static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level);static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, int blocking);static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, int blocking);static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres);#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)/* This aids in debugging situations where a bad LVB might be involved. */static void ocfs2_dump_meta_lvb_info(u64 level, const char *function, unsigned int line, struct ocfs2_lock_res *lockres){ struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; mlog(level, "LVB information for %s (called from %s:%u):\n", lockres->l_name, function, line); mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), be32_to_cpu(lvb->lvb_igeneration)); mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", (unsigned long long)be64_to_cpu(lvb->lvb_isize), be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode)); mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), (long long)be64_to_cpu(lvb->lvb_iatime_packed), (long long)be64_to_cpu(lvb->lvb_ictime_packed), (long long)be64_to_cpu(lvb->lvb_imtime_packed), be32_to_cpu(lvb->lvb_iattr));}/* * OCFS2 Lock Resource Operations * * These fine tune the behavior of the generic dlmglue locking infrastructure. * * The most basic of lock types can point ->l_priv to their respective * struct ocfs2_super and allow the default actions to manage things. * * Right now, each lock type also needs to implement an init function, * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() * should be called when the lock is no longer needed (i.e., object * destruction time). */struct ocfs2_lock_res_ops { /* * Translate an ocfs2_lock_res * into an ocfs2_super *. Define * this callback if ->l_priv is not an ocfs2_super pointer */ struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); /* * Optionally called in the downconvert (or "vote") thread * after a successful downconvert. The lockres will not be * referenced after this callback is called, so it is safe to * free memory, etc. * * The exact semantics of when this is called are controlled * by ->downconvert_worker() */ void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); /* * Allow a lock type to add checks to determine whether it is * safe to downconvert a lock. Return 0 to re-queue the * downconvert at a later time, nonzero to continue. * * For most locks, the default checks that there are no * incompatible holders are sufficient. * * Called with the lockres spinlock held. */ int (*check_downconvert)(struct ocfs2_lock_res *, int); /* * Allows a lock type to populate the lock value block. This * is called on downconvert, and when we drop a lock. * * Locks that want to use this should set LOCK_TYPE_USES_LVB * in the flags field. * * Called with the lockres spinlock held. */ void (*set_lvb)(struct ocfs2_lock_res *); /* * Called from the downconvert thread when it is determined * that a lock will be downconverted. This is called without * any locks held so the function can do work that might * schedule (syncing out data, etc). * * This should return any one of the ocfs2_unblock_action * values, depending on what it wants the thread to do. */ int (*downconvert_worker)(struct ocfs2_lock_res *, int); /* * LOCK_TYPE_* flags which describe the specific requirements * of a lock type. Descriptions of each individual flag follow. */ int flags;};/* * Some locks want to "refresh" potentially stale data when a * meaningful (PRMODE or EXMODE) lock level is first obtained. If this * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the * individual lockres l_flags member from the ast function. It is * expected that the locking wrapper will clear the * OCFS2_LOCK_NEEDS_REFRESH flag when done. */#define LOCK_TYPE_REQUIRES_REFRESH 0x1/* * Indicate that a lock type makes use of the lock value block. The * ->set_lvb lock type callback must be defined. */#define LOCK_TYPE_USES_LVB 0x2static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0,};static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { .get_osb = ocfs2_get_inode_osb, .check_downconvert = ocfs2_check_meta_downconvert, .set_lvb = ocfs2_set_meta_lvb, .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,};static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { .get_osb = ocfs2_get_inode_osb, .downconvert_worker = ocfs2_data_convert_worker, .flags = 0,};static struct ocfs2_lock_res_ops ocfs2_super_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH,};static struct ocfs2_lock_res_ops ocfs2_rename_lops = { .flags = 0,};static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, .downconvert_worker = ocfs2_dentry_convert_worker, .flags = 0,};static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0,};static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres){ return lockres->l_type == OCFS2_LOCK_TYPE_META || lockres->l_type == OCFS2_LOCK_TYPE_DATA || lockres->l_type == OCFS2_LOCK_TYPE_RW || lockres->l_type == OCFS2_LOCK_TYPE_OPEN;}static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres){ BUG_ON(!ocfs2_is_inode_lock(lockres)); return (struct inode *) lockres->l_priv;}static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres){ BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); return (struct ocfs2_dentry_lock *)lockres->l_priv;}static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres){ if (lockres->l_ops->get_osb) return lockres->l_ops->get_osb(lockres); return (struct ocfs2_super *)lockres->l_priv;}static int ocfs2_lock_create(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres, int level, int dlm_flags);static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, int wanted);static void ocfs2_cluster_unlock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres, int level);static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres);static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, int convert);#define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \ mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ "resource %s: %s\n", dlm_errname(_stat), _func, \ _lockres->l_name, dlm_errmsg(_stat)); \} while (0)static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres);static int ocfs2_meta_lock_update(struct inode *inode, struct buffer_head **bh);static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);static inline int ocfs2_highest_compat_lock_level(int level);static void ocfs2_build_lock_name(enum ocfs2_lock_type type, u64 blkno, u32 generation, char *name){ int len; mlog_entry_void(); BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, (long long)blkno, generation); BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); mlog(0, "built lock resource with name: %s\n", name); mlog_exit_void();}static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, struct ocfs2_dlm_debug *dlm_debug){ mlog(0, "Add tracking for lockres %s\n", res->l_name); spin_lock(&ocfs2_dlm_tracking_lock); list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); spin_unlock(&ocfs2_dlm_tracking_lock);}static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res){ spin_lock(&ocfs2_dlm_tracking_lock); if (!list_empty(&res->l_debug_list)) list_del_init(&res->l_debug_list); spin_unlock(&ocfs2_dlm_tracking_lock);}static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, struct ocfs2_lock_res_ops *ops, void *priv){ res->l_type = type; res->l_ops = ops; res->l_priv = priv; res->l_level = LKM_IVMODE; res->l_requested = LKM_IVMODE; res->l_blocking = LKM_IVMODE; res->l_action = OCFS2_AST_INVALID; res->l_unlock_action = OCFS2_UNLOCK_INVALID; res->l_flags = OCFS2_LOCK_INITIALIZED; ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);}void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res){ /* This also clears out the lock status block */ memset(res, 0, sizeof(struct ocfs2_lock_res)); spin_lock_init(&res->l_lock); init_waitqueue_head(&res->l_event); INIT_LIST_HEAD(&res->l_blocked_list); INIT_LIST_HEAD(&res->l_mask_waiters);}void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, enum ocfs2_lock_type type, unsigned int generation, struct inode *inode){ struct ocfs2_lock_res_ops *ops; switch(type) { case OCFS2_LOCK_TYPE_RW: ops = &ocfs2_inode_rw_lops; break; case OCFS2_LOCK_TYPE_META: ops = &ocfs2_inode_meta_lops; break; case OCFS2_LOCK_TYPE_DATA: ops = &ocfs2_inode_data_lops; break; case OCFS2_LOCK_TYPE_OPEN: ops = &ocfs2_inode_open_lops; break; default: mlog_bug_on_msg(1, "type: %d\n", type); ops = NULL; /* thanks, gcc */ break; }; ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, generation, res->l_name); ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);}static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres){ struct inode *inode = ocfs2_lock_res_inode(lockres); return OCFS2_SB(inode->i_sb);}static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres){ __be64 inode_blkno_be; memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], sizeof(__be64)); return be64_to_cpu(inode_blkno_be);}static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres){ struct ocfs2_dentry_lock *dl = lockres->l_priv; return OCFS2_SB(dl->dl_inode->i_sb);}void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, u64 parent, struct inode *inode){ int len; u64 inode_blkno = OCFS2_I(inode)->ip_blkno; __be64 inode_blkno_be = cpu_to_be64(inode_blkno); struct ocfs2_lock_res *lockres = &dl->dl_lockres; ocfs2_lock_res_init_once(lockres); /* * Unfortunately, the standard lock naming scheme won't work * here because we have two 16 byte values to use. Instead, * we'll stuff the inode number as a binary value. We still * want error prints to show something without garbling the * display, so drop a null byte in there before the inode * number. A future version of OCFS2 will likely use all * binary lock names. The stringified names have been a * tremendous aid in debugging, but now that the debugfs * interface exists, we can mangle things there if need be. * * NOTE: We also drop the standard "pad" value (the total lock * name size stays the same though - the last part is all * zeros due to the memset in ocfs2_lock_res_init_once() */ len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, "%c%016llx", ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), (long long)parent); BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, sizeof(__be64)); ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, dl);}static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, struct ocfs2_super *osb){ /* Superblock lockres doesn't come from a slab so we call init
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -