📄 dlmglue.c
字号:
mlog_entry_void(); if (ocfs2_mount_local(osb)) goto bail; spin_lock(&oi->ip_lock); if (oi->ip_flags & OCFS2_INODE_DELETED) { mlog(0, "Orphaned inode %llu was deleted while we " "were waiting on a lock. ip_flags = 0x%x\n", (unsigned long long)oi->ip_blkno, oi->ip_flags); spin_unlock(&oi->ip_lock); status = -ENOENT; goto bail; } spin_unlock(&oi->ip_lock); if (!ocfs2_should_refresh_lock_res(lockres)) goto bail; /* This will discard any caching information we might have had * for the inode metadata. */ ocfs2_metadata_cache_purge(inode); ocfs2_extent_map_trunc(inode, 0); if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { mlog(0, "Trusting LVB on inode %llu\n", (unsigned long long)oi->ip_blkno); ocfs2_refresh_inode_from_lvb(inode); } else { /* Boo, we have to go to disk. */ /* read bh, cast, ocfs2_refresh_inode */ status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, bh, OCFS2_BH_CACHED, inode); if (status < 0) { mlog_errno(status); goto bail_refresh; } fe = (struct ocfs2_dinode *) (*bh)->b_data; /* This is a good chance to make sure we're not * locking an invalid object. * * We bug on a stale inode here because we checked * above whether it was wiped from disk. The wiping * node provides a guarantee that we receive that * message and can mark the inode before dropping any * locks associated with it. */ if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); status = -EIO; goto bail_refresh; } mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation), "Invalid dinode %llu disk generation: %u " "inode->i_generation: %u\n", (unsigned long long)oi->ip_blkno, le32_to_cpu(fe->i_generation), inode->i_generation); mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), "Stale dinode %llu dtime: %llu flags: 0x%x\n", (unsigned long long)oi->ip_blkno, (unsigned long long)le64_to_cpu(fe->i_dtime), le32_to_cpu(fe->i_flags)); ocfs2_refresh_inode(inode, fe); ocfs2_track_lock_refresh(lockres); } status = 0;bail_refresh: ocfs2_complete_lock_res_refresh(lockres, status);bail: mlog_exit(status); return status;}static int ocfs2_assign_bh(struct inode *inode, struct buffer_head **ret_bh, struct buffer_head *passed_bh){ int status; if (passed_bh) { /* Ok, the update went to disk for us, use the * returned bh. */ *ret_bh = passed_bh; get_bh(*ret_bh); return 0; } status = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, ret_bh, OCFS2_BH_CACHED, inode); if (status < 0) mlog_errno(status); return status;}/* * returns < 0 error if the callback will never be called, otherwise * the result of the lock will be communicated via the callback. */int ocfs2_inode_lock_full(struct inode *inode, struct buffer_head **ret_bh, int ex, int arg_flags){ int status, level, dlm_flags, acquired; struct ocfs2_lock_res *lockres = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *local_bh = NULL; BUG_ON(!inode); mlog_entry_void(); mlog(0, "inode %llu, take %s META lock\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); status = 0; acquired = 0; /* We'll allow faking a readonly metadata lock for * rodevices. */ if (ocfs2_is_hard_readonly(osb)) { if (ex) status = -EROFS; goto bail; } if (ocfs2_mount_local(osb)) goto local; if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map)); lockres = &OCFS2_I(inode)->ip_inode_lockres; level = ex ? LKM_EXMODE : LKM_PRMODE; dlm_flags = 0; if (arg_flags & OCFS2_META_LOCK_NOQUEUE) dlm_flags |= LKM_NOQUEUE; status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); if (status < 0) { if (status != -EAGAIN && status != -EIOCBRETRY) mlog_errno(status); goto bail; } /* Notify the error cleanup path to drop the cluster lock. */ acquired = 1; /* We wait twice because a node may have died while we were in * the lower dlm layers. The second time though, we've * committed to owning this lock so we don't allow signals to * abort the operation. */ if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map));local: /* * We only see this flag if we're being called from * ocfs2_read_locked_inode(). It means we're locking an inode * which hasn't been populated yet, so clear the refresh flag * and let the caller handle it. */ if (inode->i_state & I_NEW) { status = 0; if (lockres) ocfs2_complete_lock_res_refresh(lockres, 0); goto bail; } /* This is fun. The caller may want a bh back, or it may * not. ocfs2_inode_lock_update definitely wants one in, but * may or may not read one, depending on what's in the * LVB. The result of all of this is that we've *only* gone to * disk if we have to, so the complexity is worthwhile. */ status = ocfs2_inode_lock_update(inode, &local_bh); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto bail; } if (ret_bh) { status = ocfs2_assign_bh(inode, ret_bh, local_bh); if (status < 0) { mlog_errno(status); goto bail; } }bail: if (status < 0) { if (ret_bh && (*ret_bh)) { brelse(*ret_bh); *ret_bh = NULL; } if (acquired) ocfs2_inode_unlock(inode, ex); } if (local_bh) brelse(local_bh); mlog_exit(status); return status;}/* * This is working around a lock inversion between tasks acquiring DLM * locks while holding a page lock and the downconvert thread which * blocks dlm lock acquiry while acquiring page locks. * * ** These _with_page variantes are only intended to be called from aop * methods that hold page locks and return a very specific *positive* error * code that aop methods pass up to the VFS -- test for errors with != 0. ** * * The DLM is called such that it returns -EAGAIN if it would have * blocked waiting for the downconvert thread. In that case we unlock * our page so the downconvert thread can make progress. Once we've * done this we have to return AOP_TRUNCATED_PAGE so the aop method * that called us can bubble that back up into the VFS who will then * immediately retry the aop call. * * We do a blocking lock and immediate unlock before returning, though, so that * the lock has a great chance of being cached on this node by the time the VFS * calls back to retry the aop. This has a potential to livelock as nodes * ping locks back and forth, but that's a risk we're willing to take to avoid * the lock inversion simply. */int ocfs2_inode_lock_with_page(struct inode *inode, struct buffer_head **ret_bh, int ex, struct page *page){ int ret; ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { unlock_page(page); if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) ocfs2_inode_unlock(inode, ex); ret = AOP_TRUNCATED_PAGE; } return ret;}int ocfs2_inode_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, int *level){ int ret; mlog_entry_void(); ret = ocfs2_inode_lock(inode, NULL, 0); if (ret < 0) { mlog_errno(ret); return ret; } /* * If we should update atime, we will get EX lock, * otherwise we just get PR lock. */ if (ocfs2_should_update_atime(inode, vfsmnt)) { struct buffer_head *bh = NULL; ocfs2_inode_unlock(inode, 0); ret = ocfs2_inode_lock(inode, &bh, 1); if (ret < 0) { mlog_errno(ret); return ret; } *level = 1; if (ocfs2_should_update_atime(inode, vfsmnt)) ocfs2_update_inode_atime(inode, bh); if (bh) brelse(bh); } else *level = 0; mlog_exit(ret); return ret;}void ocfs2_inode_unlock(struct inode *inode, int ex){ int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); mlog(0, "inode %llu drop %s META lock\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void();}int ocfs2_super_lock(struct ocfs2_super *osb, int ex){ int status = 0; int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; struct buffer_head *bh; struct ocfs2_slot_info *si = osb->slot_info; mlog_entry_void(); if (ocfs2_is_hard_readonly(osb)) return -EROFS; if (ocfs2_mount_local(osb)) goto bail; status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); if (status < 0) { mlog_errno(status); goto bail; } /* The super block lock path is really in the best position to * know when resources covered by the lock need to be * refreshed, so we do it here. Of course, making sense of * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); if (status < 0) { mlog_errno(status); goto bail; } if (status) { bh = si->si_bh; status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, si->si_inode); if (status == 0) ocfs2_update_slot_info(si); ocfs2_complete_lock_res_refresh(lockres, status); if (status < 0) mlog_errno(status); ocfs2_track_lock_refresh(lockres); }bail: mlog_exit(status); return status;}void ocfs2_super_unlock(struct ocfs2_super *osb, int ex){ int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; if (!ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, lockres, level);}int ocfs2_rename_lock(struct ocfs2_super *osb){ int status; struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; if (ocfs2_is_hard_readonly(osb)) return -EROFS; if (ocfs2_mount_local(osb)) return 0; status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); if (status < 0) mlog_errno(status); return status;}void ocfs2_rename_unlock(struct ocfs2_super *osb){ struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; if (!ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);}int ocfs2_dentry_lock(struct dentry *dentry, int ex){ int ret; int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_dentry_lock *dl = dentry->d_fsdata; struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); BUG_ON(!dl); if (ocfs2_is_hard_readonly(osb)) return -EROFS; if (ocfs2_mount_local(osb)) return 0; ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); if (ret < 0) mlog_errno(ret); return ret;}void ocfs2_dentry_unlock(struct dentry *dentry, int ex){ int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_dentry_lock *dl = dentry->d_fsdata; struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); if (!ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);}/* Reference counting of the dlm debug structure. We want this because * open references on the debug inodes can live on after a mount, so * we can't rely on the ocfs2_super to always exist. */static void ocfs2_dlm_debug_free(struct kref *kref){ struct ocfs2_dlm_debug *dlm_debug; dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); kfree(dlm_debug);}void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug){ if (dlm_debug) kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);}static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug){ kref_get(&debug->d_refcnt);}struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void){ struct ocfs2_dlm_debug *dlm_debug; dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); if (!dlm_debug) { mlog_errno(-ENOMEM); goto out; } kref_init(&dlm_debug->d_refcnt); INIT_LIST_HEAD(&dlm_debu
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -