📄 dlmglue.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));	lvb->lvb_ictime_packed =		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));	lvb->lvb_imtime_packed =		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);out:	mlog_meta_lvb(0, lockres);	mlog_exit_void();}static void ocfs2_unpack_timespec(struct timespec *spec,				  u64 packed_time){	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;}static void ocfs2_refresh_inode_from_lvb(struct inode *inode){	struct ocfs2_inode_info *oi = OCFS2_I(inode);	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;	struct ocfs2_meta_lvb *lvb;	mlog_entry_void();	mlog_meta_lvb(0, lockres);	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;	/* We're safe here without the lockres lock... */	spin_lock(&oi->ip_lock);	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);	ocfs2_set_inode_flags(inode);	/* fast-symlinks are a special case */	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)		inode->i_blocks = 0;	else		inode->i_blocks = ocfs2_inode_sector_count(inode);	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);	inode->i_gid     = be32_to_cpu(lvb->lvb_igid);	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);	inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);	ocfs2_unpack_timespec(&inode->i_atime,			      be64_to_cpu(lvb->lvb_iatime_packed));	ocfs2_unpack_timespec(&inode->i_mtime,			      be64_to_cpu(lvb->lvb_imtime_packed));	ocfs2_unpack_timespec(&inode->i_ctime,			      be64_to_cpu(lvb->lvb_ictime_packed));	spin_unlock(&oi->ip_lock);	mlog_exit_void();}static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,					      struct ocfs2_lock_res *lockres){	struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;	if (lvb->lvb_version == OCFS2_LVB_VERSION	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)		return 1;	return 0;}/* Determine whether a lock resource needs to be refreshed, and * arbitrate who gets to refresh it. * *   0 means no refresh needed. * *   > 0 means you need to refresh this and you MUST call *   ocfs2_complete_lock_res_refresh afterwards. */static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres){	unsigned long flags;	int status = 0;	mlog_entry_void();refresh_check:	spin_lock_irqsave(&lockres->l_lock, flags);	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {		spin_unlock_irqrestore(&lockres->l_lock, flags);		goto bail;	}	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {		spin_unlock_irqrestore(&lockres->l_lock, flags);		ocfs2_wait_on_refreshing_lock(lockres);		goto refresh_check;	}	/* Ok, I'll be the one to refresh this lock. */	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);	spin_unlock_irqrestore(&lockres->l_lock, flags);	status = 1;bail:	mlog_exit(status);	return status;}/* If status is non zero, I'll mark it as not being in refresh * anymroe, but i won't clear the needs refresh flag. */static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,						   int status){	unsigned long flags;	mlog_entry_void();	spin_lock_irqsave(&lockres->l_lock, flags);	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);	if (!status)		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);	spin_unlock_irqrestore(&lockres->l_lock, flags);	wake_up(&lockres->l_event);	mlog_exit_void();}/* may or may not return a bh if it went to disk. */static int ocfs2_meta_lock_update(struct inode *inode,				  struct buffer_head **bh){	int status = 0;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;	struct ocfs2_dinode *fe;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry_void();	if (ocfs2_mount_local(osb))		goto bail;	spin_lock(&oi->ip_lock);	if (oi->ip_flags & OCFS2_INODE_DELETED) {		mlog(0, "Orphaned inode %llu was deleted while we "		     "were waiting on a lock. ip_flags = 0x%x\n",		     (unsigned long long)oi->ip_blkno, oi->ip_flags);		spin_unlock(&oi->ip_lock);		status = -ENOENT;		goto bail;	}	spin_unlock(&oi->ip_lock);	if (!ocfs2_should_refresh_lock_res(lockres))		goto bail;	/* This will discard any caching information we might have had	 * for the inode metadata. */	ocfs2_metadata_cache_purge(inode);	ocfs2_extent_map_trunc(inode, 0);	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {		mlog(0, "Trusting LVB on inode %llu\n",		     (unsigned long long)oi->ip_blkno);		ocfs2_refresh_inode_from_lvb(inode);	} else {		/* Boo, we have to go to disk. */		/* read bh, cast, ocfs2_refresh_inode */		status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno,					  bh, OCFS2_BH_CACHED, inode);		if (status < 0) {			mlog_errno(status);			goto bail_refresh;		}		fe = (struct ocfs2_dinode *) (*bh)->b_data;		/* This is a good chance to make sure we're not		 * locking an invalid object.		 *		 * We bug on a stale inode here because we checked		 * above whether it was wiped from disk. The wiping		 * node provides a guarantee that we receive that		 * message and can mark the inode before dropping any		 * locks associated with it. */		if (!OCFS2_IS_VALID_DINODE(fe)) {			OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);			status = -EIO;			goto bail_refresh;		}		mlog_bug_on_msg(inode->i_generation !=				le32_to_cpu(fe->i_generation),				"Invalid dinode %llu disk generation: %u "				"inode->i_generation: %u\n",				(unsigned long long)oi->ip_blkno,				le32_to_cpu(fe->i_generation),				inode->i_generation);		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),				"Stale dinode %llu dtime: %llu flags: 0x%x\n",				(unsigned long long)oi->ip_blkno,				(unsigned long long)le64_to_cpu(fe->i_dtime),				le32_to_cpu(fe->i_flags));		ocfs2_refresh_inode(inode, fe);	}	status = 0;bail_refresh:	ocfs2_complete_lock_res_refresh(lockres, status);bail:	mlog_exit(status);	return status;}static int ocfs2_assign_bh(struct inode *inode,			   struct buffer_head **ret_bh,			   struct buffer_head *passed_bh){	int status;	if (passed_bh) {		/* Ok, the update went to disk for us, use the		 * returned bh. */		*ret_bh = passed_bh;		get_bh(*ret_bh);		return 0;	}	status = ocfs2_read_block(OCFS2_SB(inode->i_sb),				  OCFS2_I(inode)->ip_blkno,				  ret_bh,				  OCFS2_BH_CACHED,				  inode);	if (status < 0)		mlog_errno(status);	return status;}/* * returns < 0 error if the callback will never be called, otherwise * the result of the lock will be communicated via the callback. */int ocfs2_meta_lock_full(struct inode *inode,			 struct buffer_head **ret_bh,			 int ex,			 int arg_flags){	int status, level, dlm_flags, acquired;	struct ocfs2_lock_res *lockres = NULL;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	struct buffer_head *local_bh = NULL;	BUG_ON(!inode);	mlog_entry_void();	mlog(0, "inode %llu, take %s META lock\n",	     (unsigned long long)OCFS2_I(inode)->ip_blkno,	     ex ? "EXMODE" : "PRMODE");	status = 0;	acquired = 0;	/* We'll allow faking a readonly metadata lock for	 * rodevices. */	if (ocfs2_is_hard_readonly(osb)) {		if (ex)			status = -EROFS;		goto bail;	}	if (ocfs2_mount_local(osb))		goto local;	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))		wait_event(osb->recovery_event,			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));	lockres = &OCFS2_I(inode)->ip_meta_lockres;	level = ex ? LKM_EXMODE : LKM_PRMODE;	dlm_flags = 0;	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)		dlm_flags |= LKM_NOQUEUE;	status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);	if (status < 0) {		if (status != -EAGAIN && status != -EIOCBRETRY)			mlog_errno(status);		goto bail;	}	/* Notify the error cleanup path to drop the cluster lock. */	acquired = 1;	/* We wait twice because a node may have died while we were in	 * the lower dlm layers. The second time though, we've	 * committed to owning this lock so we don't allow signals to	 * abort the operation. */	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))		wait_event(osb->recovery_event,			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));local:	/*	 * We only see this flag if we're being called from	 * ocfs2_read_locked_inode(). It means we're locking an inode	 * which hasn't been populated yet, so clear the refresh flag	 * and let the caller handle it.	 */	if (inode->i_state & I_NEW) {		status = 0;		if (lockres)			ocfs2_complete_lock_res_refresh(lockres, 0);		goto bail;	}	/* This is fun. The caller may want a bh back, or it may	 * not. ocfs2_meta_lock_update definitely wants one in, but	 * may or may not read one, depending on what's in the	 * LVB. The result of all of this is that we've *only* gone to	 * disk if we have to, so the complexity is worthwhile. */	status = ocfs2_meta_lock_update(inode, &local_bh);	if (status < 0) {		if (status != -ENOENT)			mlog_errno(status);		goto bail;	}	if (ret_bh) {		status = ocfs2_assign_bh(inode, ret_bh, local_bh);		if (status < 0) {			mlog_errno(status);			goto bail;		}	}bail:	if (status < 0) {		if (ret_bh && (*ret_bh)) {			brelse(*ret_bh);			*ret_bh = NULL;		}		if (acquired)			ocfs2_meta_unlock(inode, ex);	}	if (local_bh)		brelse(local_bh);	mlog_exit(status);	return status;}/* * This is working around a lock inversion between tasks acquiring DLM locks * while holding a page lock and the vote thread which blocks dlm lock acquiry * while acquiring page locks. * * ** These _with_page variantes are only intended to be called from aop * methods that hold page locks and return a very specific *positive* error * code that aop methods pass up to the VFS -- test for errors with != 0. ** * * The DLM is called such that it returns -EAGAIN if it would have blocked * waiting for the vote thread.  In that case we unlock our page so the vote * thread can make progress.  Once we've done this we have to return * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up * into the VFS who will then immediately retry the aop call. * * We do a blocking lock and immediate unlock before returning, though, so that * the lock has a great chance of being cached on this node by the time the VFS * calls back to retry the aop.    This has a potential to livelock as nodes * ping locks back and forth, but that's a risk we're willing to take to avoid * the lock inversion simply. */int ocfs2_meta_lock_with_page(struct inode *inode,			      struct buffer_head **ret_bh,			      int ex,			      struct page *page){	int ret;	ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);	if (ret == -EAGAIN) {		unlock_page(page);		if (ocfs2_meta_lock(inode, ret_bh, ex) == 0)			ocfs2_meta_unlock(inode, ex);		ret = AOP_TRUNCATED_PAGE;	}	return ret;}int ocfs2_meta_lock_atime(struct inode *inode,			  struct vfsmount *vfsmnt,			  int *level){	int ret;	mlog_entry_void();	ret = ocfs2_meta_lock(inode, NULL, 0);	if (ret < 0) {		mlog_errno(ret);		return ret;	}	/*	 * If we should update atime, we will get EX lock,	 * otherwise we just get PR lock.	 */	if (ocfs2_should_update_atime(inode, vfsmnt)) {		struct buffer_head *bh = NULL;		ocfs2_meta_unlock(inode, 0);		ret = ocfs2_meta_lock(inode, &bh, 1);		if (ret < 0) {			mlog_errno(ret);			return ret;		}		*level = 1;		if (ocfs2_should_update_atime(inode, vfsmnt))			ocfs2_update_inode_atime(inode, bh);		if (bh)			brelse(bh);	} else		*level = 0;	mlog_exit(ret);	return ret;}void ocfs2_meta_unlock(struct inode *inode,		       int ex){	int level = ex ? LKM_EXMODE : LKM_PRMODE;	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry_void();	mlog(0, "inode %llu drop %s META lock\n",	     (unsigned long long)OCFS2_I(inode)->ip_blkno,	     ex ? "EXMODE" : "PRMODE");	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&	    !ocfs2_mount_local(osb))		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);	mlog_exit_void();}int ocfs2_super_lock(struct ocfs2_super *osb,		     int ex){	int status = 0;	int level = ex ? LKM_EXMODE : LKM_PRMODE;	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;	struct buffer_head *bh;	struct ocfs2_slot_info *si = osb->slot_info;	mlog_entry_void();	if (ocfs2_is_hard_readonly(osb))		return -EROFS;	if (ocfs2_mount_local(osb))		goto bail;	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);	if (status < 0) {		mlog_errno(status);		goto bail;	}	/* The super block lock path is really in the best position to	 * know when resources covered by the lock need to be	 * refreshed, so we do it here. Of course, making sense of	 * everything is up to the caller :) */	status = ocfs2_should_refresh_lock_res(lockres);	if (status < 0) {		mlog_errno(status);		goto bail;	}	if (status) {		bh = si->si_bh;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -