📄 dlmglue.c

📁 ocfs1.4.1 oracle分布式文件系统
💻 C
📖 第 1 页 / 共 5 页
字号:
	lockres = &OCFS2_I(inode)->ip_open_lockres;	level = write ? LKM_EXMODE : LKM_PRMODE;	/*	 * The file system may already holding a PRMODE/EXMODE open lock.	 * Since we pass LKM_NOQUEUE, the request won't block waiting on	 * other nodes and the -EAGAIN will indicate to the caller that	 * this inode is still in use.	 */	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,				    level, LKM_NOQUEUE, 0);out:	mlog_exit(status);	return status;}/* * ocfs2_open_unlock unlock PR and EX mode open locks. */void ocfs2_open_unlock(struct inode *inode){	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry_void();	mlog(0, "inode %llu drop open lock\n",	     (unsigned long long)OCFS2_I(inode)->ip_blkno);	if (ocfs2_mount_local(osb))		goto out;	if(lockres->l_ro_holders)		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,				     LKM_PRMODE);	if(lockres->l_ex_holders)		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,				     LKM_EXMODE);out:	mlog_exit_void();}static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,				     int level){	int ret;	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);	unsigned long flags;	struct ocfs2_mask_waiter mw;	ocfs2_init_mask_waiter(&mw);retry_cancel:	spin_lock_irqsave(&lockres->l_lock, flags);	if (lockres->l_flags & OCFS2_LOCK_BUSY) {		ret = ocfs2_prepare_cancel_convert(osb, lockres);		if (ret) {			spin_unlock_irqrestore(&lockres->l_lock, flags);			ret = ocfs2_cancel_convert(osb, lockres);			if (ret < 0) {				mlog_errno(ret);				goto out;			}			goto retry_cancel;		}		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);		spin_unlock_irqrestore(&lockres->l_lock, flags);		ocfs2_wait_for_mask(&mw);		goto retry_cancel;	}	ret = -ERESTARTSYS;	/*	 * We may still have gotten the lock, in which case there's no	 * point to restarting the syscall.	 */	if (lockres->l_level == level)		ret = 0;	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,	     lockres->l_flags, lockres->l_level, lockres->l_action);	spin_unlock_irqrestore(&lockres->l_lock, flags);out:	return ret;}/* * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of * flock() calls. The locking approach this requires is sufficiently * different from all other cluster lock types that we implement a * seperate path to the "low-level" dlm calls. In particular: * * - No optimization of lock levels is done - we take at exactly *   what's been requested. * * - No lock caching is employed. We immediately downconvert to *   no-lock at unlock time. This also means flock locks never go on *   the blocking list). * * - Since userspace can trivially deadlock itself with flock, we make *   sure to allow cancellation of a misbehaving applications flock() *   request. * * - Access to any flock lockres doesn't require concurrency, so we *   can simplify the code by requiring the caller to guarantee *   serialization of dlmglue flock calls. */int ocfs2_file_lock(struct file *file, int ex, int trylock){	int ret, level = ex ? LKM_EXMODE : LKM_PRMODE;	unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0;	unsigned long flags;	struct ocfs2_file_private *fp = file->private_data;	struct ocfs2_lock_res *lockres = &fp->fp_flock;	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);	struct ocfs2_mask_waiter mw;	ocfs2_init_mask_waiter(&mw);	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||	    (lockres->l_level > LKM_NLMODE)) {		mlog(ML_ERROR,		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "		     "level: %u\n", lockres->l_name, lockres->l_flags,		     lockres->l_level);		return -EINVAL;	}	spin_lock_irqsave(&lockres->l_lock, flags);	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);		spin_unlock_irqrestore(&lockres->l_lock, flags);		/*		 * Get the lock at NLMODE to start - that way we		 * can cancel the upconvert request if need be.		 */		ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);		if (ret < 0) {			mlog_errno(ret);			goto out;		}		ret = ocfs2_wait_for_mask(&mw);		if (ret) {			mlog_errno(ret);			goto out;		}		spin_lock_irqsave(&lockres->l_lock, flags);	}	lockres->l_action = OCFS2_AST_CONVERT;	lkm_flags |= LKM_CONVERT;	lockres->l_requested = level;	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);	spin_unlock_irqrestore(&lockres->l_lock, flags);	ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags,		      lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,		      ocfs2_locking_ast, lockres, ocfs2_blocking_ast);	if (ret != DLM_NORMAL) {		if (trylock && ret == DLM_NOTQUEUED)			ret = -EAGAIN;		else {			ocfs2_log_dlm_error("dlmlock", ret, lockres);			ret = -EINVAL;		}		ocfs2_recover_from_dlm_error(lockres, 1);		lockres_remove_mask_waiter(lockres, &mw);		goto out;	}	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);	if (ret == -ERESTARTSYS) {		/*		 * Userspace can cause deadlock itself with		 * flock(). Current behavior locally is to allow the		 * deadlock, but abort the system call if a signal is		 * received. We follow this example, otherwise a		 * poorly written program could sit in kernel until		 * reboot.		 *		 * Handling this is a bit more complicated for Ocfs2		 * though. We can't exit this function with an		 * outstanding lock request, so a cancel convert is		 * required. We intentionally overwrite 'ret' - if the		 * cancel fails and the lock was granted, it's easier		 * to just bubble sucess back up to the user.		 */		ret = ocfs2_flock_handle_signal(lockres, level);	}out:	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",	     lockres->l_name, ex, trylock, ret);	return ret;}void ocfs2_file_unlock(struct file *file){	int ret;	unsigned long flags;	struct ocfs2_file_private *fp = file->private_data;	struct ocfs2_lock_res *lockres = &fp->fp_flock;	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);	struct ocfs2_mask_waiter mw;	ocfs2_init_mask_waiter(&mw);	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))		return;	if (lockres->l_level == LKM_NLMODE)		return;	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",	     lockres->l_name, lockres->l_flags, lockres->l_level,	     lockres->l_action);	spin_lock_irqsave(&lockres->l_lock, flags);	/*	 * Fake a blocking ast for the downconvert code.	 */	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);	lockres->l_blocking = LKM_EXMODE;	ocfs2_prepare_downconvert(lockres, LKM_NLMODE);	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);	spin_unlock_irqrestore(&lockres->l_lock, flags);	ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0);	if (ret) {		mlog_errno(ret);		return;	}	ret = ocfs2_wait_for_mask(&mw);	if (ret)		mlog_errno(ret);}static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,					struct ocfs2_lock_res *lockres){	int kick = 0;	mlog_entry_void();	/* If we know that another node is waiting on our lock, kick	 * the downconvert thread * pre-emptively when we reach a release	 * condition. */	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {		switch(lockres->l_blocking) {		case LKM_EXMODE:			if (!lockres->l_ex_holders && !lockres->l_ro_holders)				kick = 1;			break;		case LKM_PRMODE:			if (!lockres->l_ex_holders)				kick = 1;			break;		default:			BUG();		}	}	if (kick)		ocfs2_wake_downconvert_thread(osb);	mlog_exit_void();}#define OCFS2_SEC_BITS   34#define OCFS2_SEC_SHIFT  (64 - 34)#define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)/* LVB only has room for 64 bits of time here so we pack it for * now. */static u64 ocfs2_pack_timespec(struct timespec *spec){	u64 res;	u64 sec = spec->tv_sec;	u32 nsec = spec->tv_nsec;	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);	return res;}/* Call this with the lockres locked. I am reasonably sure we don't * need ip_lock in this function as anyone who would be changing those * values is supposed to be blocked in ocfs2_inode_lock right now. */static void __ocfs2_stuff_meta_lvb(struct inode *inode){	struct ocfs2_inode_info *oi = OCFS2_I(inode);	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;	struct ocfs2_meta_lvb *lvb;	mlog_entry_void();	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;	/*	 * Invalidate the LVB of a deleted inode - this way other	 * nodes are forced to go to disk and discover the new inode	 * status.	 */	if (oi->ip_flags & OCFS2_INODE_DELETED) {		lvb->lvb_version = 0;		goto out;	}	lvb->lvb_version   = OCFS2_LVB_VERSION;	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);	lvb->lvb_igid      = cpu_to_be32(inode->i_gid);	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);	lvb->lvb_iatime_packed  =		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));	lvb->lvb_ictime_packed =		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));	lvb->lvb_imtime_packed =		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);out:	mlog_meta_lvb(0, lockres);	mlog_exit_void();}static void ocfs2_unpack_timespec(struct timespec *spec,				  u64 packed_time){	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;}static void ocfs2_refresh_inode_from_lvb(struct inode *inode){	struct ocfs2_inode_info *oi = OCFS2_I(inode);	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;	struct ocfs2_meta_lvb *lvb;	mlog_entry_void();	mlog_meta_lvb(0, lockres);	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;	/* We're safe here without the lockres lock... */	spin_lock(&oi->ip_lock);	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);	ocfs2_set_inode_flags(inode);	/* fast-symlinks are a special case */	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)		inode->i_blocks = 0;	else		inode->i_blocks = ocfs2_inode_sector_count(inode);	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);	inode->i_gid     = be32_to_cpu(lvb->lvb_igid);	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);	inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);	ocfs2_unpack_timespec(&inode->i_atime,			      be64_to_cpu(lvb->lvb_iatime_packed));	ocfs2_unpack_timespec(&inode->i_mtime,			      be64_to_cpu(lvb->lvb_imtime_packed));	ocfs2_unpack_timespec(&inode->i_ctime,			      be64_to_cpu(lvb->lvb_ictime_packed));	spin_unlock(&oi->ip_lock);	mlog_exit_void();}static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,					      struct ocfs2_lock_res *lockres){	struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;	if (lvb->lvb_version == OCFS2_LVB_VERSION	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)		return 1;	return 0;}/* Determine whether a lock resource needs to be refreshed, and * arbitrate who gets to refresh it. * *   0 means no refresh needed. * *   > 0 means you need to refresh this and you MUST call *   ocfs2_complete_lock_res_refresh afterwards. */static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres){	unsigned long flags;	int status = 0;	mlog_entry_void();refresh_check:	spin_lock_irqsave(&lockres->l_lock, flags);	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {		spin_unlock_irqrestore(&lockres->l_lock, flags);		goto bail;	}	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {		spin_unlock_irqrestore(&lockres->l_lock, flags);		ocfs2_wait_on_refreshing_lock(lockres);		goto refresh_check;	}	/* Ok, I'll be the one to refresh this lock. */	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);	spin_unlock_irqrestore(&lockres->l_lock, flags);	status = 1;bail:	mlog_exit(status);	return status;}/* If status is non zero, I'll mark it as not being in refresh * anymroe, but i won't clear the needs refresh flag. */static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,						   int status){	unsigned long flags;	mlog_entry_void();	spin_lock_irqsave(&lockres->l_lock, flags);	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);	if (!status)		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);	spin_unlock_irqrestore(&lockres->l_lock, flags);	wake_up(&lockres->l_event);	mlog_exit_void();}/* may or may not return a bh if it went to disk. */static int ocfs2_inode_lock_update(struct inode *inode,				  struct buffer_head **bh){	int status = 0;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;	struct ocfs2_dinode *fe;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
💿 文件大小 441 K
👤 上传用户 jxsflq
📂 所属分类网络
🏷️ 相关标签

#oracle #ocfs #分布式 #文件系统
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -