📄 dlmglue.c
字号:
cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); lvb->lvb_ictime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);out: mlog_meta_lvb(0, lockres); mlog_exit_void();}static void ocfs2_unpack_timespec(struct timespec *spec, u64 packed_time){ spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;}static void ocfs2_refresh_inode_from_lvb(struct inode *inode){ struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; struct ocfs2_meta_lvb *lvb; mlog_entry_void(); mlog_meta_lvb(0, lockres); lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; /* We're safe here without the lockres lock... */ spin_lock(&oi->ip_lock); oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); ocfs2_set_inode_flags(inode); /* fast-symlinks are a special case */ if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) inode->i_blocks = 0; else inode->i_blocks = ocfs2_inode_sector_count(inode); inode->i_uid = be32_to_cpu(lvb->lvb_iuid); inode->i_gid = be32_to_cpu(lvb->lvb_igid); inode->i_mode = be16_to_cpu(lvb->lvb_imode); inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); ocfs2_unpack_timespec(&inode->i_atime, be64_to_cpu(lvb->lvb_iatime_packed)); ocfs2_unpack_timespec(&inode->i_mtime, be64_to_cpu(lvb->lvb_imtime_packed)); ocfs2_unpack_timespec(&inode->i_ctime, be64_to_cpu(lvb->lvb_ictime_packed)); spin_unlock(&oi->ip_lock); mlog_exit_void();}static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, struct ocfs2_lock_res *lockres){ struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; if (lvb->lvb_version == OCFS2_LVB_VERSION && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) return 1; return 0;}/* Determine whether a lock resource needs to be refreshed, and * arbitrate who gets to refresh it. * * 0 means no refresh needed. * * > 0 means you need to refresh this and you MUST call * ocfs2_complete_lock_res_refresh afterwards. */static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres){ unsigned long flags; int status = 0; mlog_entry_void();refresh_check: spin_lock_irqsave(&lockres->l_lock, flags); if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { spin_unlock_irqrestore(&lockres->l_lock, flags); goto bail; } if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { spin_unlock_irqrestore(&lockres->l_lock, flags); ocfs2_wait_on_refreshing_lock(lockres); goto refresh_check; } /* Ok, I'll be the one to refresh this lock. */ lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); spin_unlock_irqrestore(&lockres->l_lock, flags); status = 1;bail: mlog_exit(status); return status;}/* If status is non zero, I'll mark it as not being in refresh * anymroe, but i won't clear the needs refresh flag. */static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, int status){ unsigned long flags; mlog_entry_void(); spin_lock_irqsave(&lockres->l_lock, flags); lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); if (!status) lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); spin_unlock_irqrestore(&lockres->l_lock, flags); wake_up(&lockres->l_event); mlog_exit_void();}/* may or may not return a bh if it went to disk. */static int ocfs2_meta_lock_update(struct inode *inode, struct buffer_head **bh){ int status = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; struct ocfs2_dinode *fe; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); if (ocfs2_mount_local(osb)) goto bail; spin_lock(&oi->ip_lock); if (oi->ip_flags & OCFS2_INODE_DELETED) { mlog(0, "Orphaned inode %llu was deleted while we " "were waiting on a lock. ip_flags = 0x%x\n", (unsigned long long)oi->ip_blkno, oi->ip_flags); spin_unlock(&oi->ip_lock); status = -ENOENT; goto bail; } spin_unlock(&oi->ip_lock); if (!ocfs2_should_refresh_lock_res(lockres)) goto bail; /* This will discard any caching information we might have had * for the inode metadata. */ ocfs2_metadata_cache_purge(inode); ocfs2_extent_map_trunc(inode, 0); if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { mlog(0, "Trusting LVB on inode %llu\n", (unsigned long long)oi->ip_blkno); ocfs2_refresh_inode_from_lvb(inode); } else { /* Boo, we have to go to disk. */ /* read bh, cast, ocfs2_refresh_inode */ status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, bh, OCFS2_BH_CACHED, inode); if (status < 0) { mlog_errno(status); goto bail_refresh; } fe = (struct ocfs2_dinode *) (*bh)->b_data; /* This is a good chance to make sure we're not * locking an invalid object. * * We bug on a stale inode here because we checked * above whether it was wiped from disk. The wiping * node provides a guarantee that we receive that * message and can mark the inode before dropping any * locks associated with it. */ if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); status = -EIO; goto bail_refresh; } mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation), "Invalid dinode %llu disk generation: %u " "inode->i_generation: %u\n", (unsigned long long)oi->ip_blkno, le32_to_cpu(fe->i_generation), inode->i_generation); mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), "Stale dinode %llu dtime: %llu flags: 0x%x\n", (unsigned long long)oi->ip_blkno, (unsigned long long)le64_to_cpu(fe->i_dtime), le32_to_cpu(fe->i_flags)); ocfs2_refresh_inode(inode, fe); } status = 0;bail_refresh: ocfs2_complete_lock_res_refresh(lockres, status);bail: mlog_exit(status); return status;}static int ocfs2_assign_bh(struct inode *inode, struct buffer_head **ret_bh, struct buffer_head *passed_bh){ int status; if (passed_bh) { /* Ok, the update went to disk for us, use the * returned bh. */ *ret_bh = passed_bh; get_bh(*ret_bh); return 0; } status = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, ret_bh, OCFS2_BH_CACHED, inode); if (status < 0) mlog_errno(status); return status;}/* * returns < 0 error if the callback will never be called, otherwise * the result of the lock will be communicated via the callback. */int ocfs2_meta_lock_full(struct inode *inode, struct buffer_head **ret_bh, int ex, int arg_flags){ int status, level, dlm_flags, acquired; struct ocfs2_lock_res *lockres = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *local_bh = NULL; BUG_ON(!inode); mlog_entry_void(); mlog(0, "inode %llu, take %s META lock\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); status = 0; acquired = 0; /* We'll allow faking a readonly metadata lock for * rodevices. */ if (ocfs2_is_hard_readonly(osb)) { if (ex) status = -EROFS; goto bail; } if (ocfs2_mount_local(osb)) goto local; if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map)); lockres = &OCFS2_I(inode)->ip_meta_lockres; level = ex ? LKM_EXMODE : LKM_PRMODE; dlm_flags = 0; if (arg_flags & OCFS2_META_LOCK_NOQUEUE) dlm_flags |= LKM_NOQUEUE; status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); if (status < 0) { if (status != -EAGAIN && status != -EIOCBRETRY) mlog_errno(status); goto bail; } /* Notify the error cleanup path to drop the cluster lock. */ acquired = 1; /* We wait twice because a node may have died while we were in * the lower dlm layers. The second time though, we've * committed to owning this lock so we don't allow signals to * abort the operation. */ if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map));local: /* * We only see this flag if we're being called from * ocfs2_read_locked_inode(). It means we're locking an inode * which hasn't been populated yet, so clear the refresh flag * and let the caller handle it. */ if (inode->i_state & I_NEW) { status = 0; if (lockres) ocfs2_complete_lock_res_refresh(lockres, 0); goto bail; } /* This is fun. The caller may want a bh back, or it may * not. ocfs2_meta_lock_update definitely wants one in, but * may or may not read one, depending on what's in the * LVB. The result of all of this is that we've *only* gone to * disk if we have to, so the complexity is worthwhile. */ status = ocfs2_meta_lock_update(inode, &local_bh); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto bail; } if (ret_bh) { status = ocfs2_assign_bh(inode, ret_bh, local_bh); if (status < 0) { mlog_errno(status); goto bail; } }bail: if (status < 0) { if (ret_bh && (*ret_bh)) { brelse(*ret_bh); *ret_bh = NULL; } if (acquired) ocfs2_meta_unlock(inode, ex); } if (local_bh) brelse(local_bh); mlog_exit(status); return status;}/* * This is working around a lock inversion between tasks acquiring DLM locks * while holding a page lock and the vote thread which blocks dlm lock acquiry * while acquiring page locks. * * ** These _with_page variantes are only intended to be called from aop * methods that hold page locks and return a very specific *positive* error * code that aop methods pass up to the VFS -- test for errors with != 0. ** * * The DLM is called such that it returns -EAGAIN if it would have blocked * waiting for the vote thread. In that case we unlock our page so the vote * thread can make progress. Once we've done this we have to return * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up * into the VFS who will then immediately retry the aop call. * * We do a blocking lock and immediate unlock before returning, though, so that * the lock has a great chance of being cached on this node by the time the VFS * calls back to retry the aop. This has a potential to livelock as nodes * ping locks back and forth, but that's a risk we're willing to take to avoid * the lock inversion simply. */int ocfs2_meta_lock_with_page(struct inode *inode, struct buffer_head **ret_bh, int ex, struct page *page){ int ret; ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { unlock_page(page); if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) ocfs2_meta_unlock(inode, ex); ret = AOP_TRUNCATED_PAGE; } return ret;}int ocfs2_meta_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, int *level){ int ret; mlog_entry_void(); ret = ocfs2_meta_lock(inode, NULL, 0); if (ret < 0) { mlog_errno(ret); return ret; } /* * If we should update atime, we will get EX lock, * otherwise we just get PR lock. */ if (ocfs2_should_update_atime(inode, vfsmnt)) { struct buffer_head *bh = NULL; ocfs2_meta_unlock(inode, 0); ret = ocfs2_meta_lock(inode, &bh, 1); if (ret < 0) { mlog_errno(ret); return ret; } *level = 1; if (ocfs2_should_update_atime(inode, vfsmnt)) ocfs2_update_inode_atime(inode, bh); if (bh) brelse(bh); } else *level = 0; mlog_exit(ret); return ret;}void ocfs2_meta_unlock(struct inode *inode, int ex){ int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); mlog(0, "inode %llu drop %s META lock\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void();}int ocfs2_super_lock(struct ocfs2_super *osb, int ex){ int status = 0; int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; struct buffer_head *bh; struct ocfs2_slot_info *si = osb->slot_info; mlog_entry_void(); if (ocfs2_is_hard_readonly(osb)) return -EROFS; if (ocfs2_mount_local(osb)) goto bail; status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); if (status < 0) { mlog_errno(status); goto bail; } /* The super block lock path is really in the best position to * know when resources covered by the lock need to be * refreshed, so we do it here. Of course, making sense of * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); if (status < 0) { mlog_errno(status); goto bail; } if (status) { bh = si->si_bh;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -