📄 dlmglue.c
字号:
lockres = &OCFS2_I(inode)->ip_open_lockres; level = write ? LKM_EXMODE : LKM_PRMODE; /* * The file system may already holding a PRMODE/EXMODE open lock. * Since we pass LKM_NOQUEUE, the request won't block waiting on * other nodes and the -EAGAIN will indicate to the caller that * this inode is still in use. */ status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, LKM_NOQUEUE, 0);out: mlog_exit(status); return status;}/* * ocfs2_open_unlock unlock PR and EX mode open locks. */void ocfs2_open_unlock(struct inode *inode){ struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); mlog(0, "inode %llu drop open lock\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); if (ocfs2_mount_local(osb)) goto out; if(lockres->l_ro_holders) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, LKM_PRMODE); if(lockres->l_ex_holders) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, LKM_EXMODE);out: mlog_exit_void();}static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, int level){ int ret; struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); unsigned long flags; struct ocfs2_mask_waiter mw; ocfs2_init_mask_waiter(&mw);retry_cancel: spin_lock_irqsave(&lockres->l_lock, flags); if (lockres->l_flags & OCFS2_LOCK_BUSY) { ret = ocfs2_prepare_cancel_convert(osb, lockres); if (ret) { spin_unlock_irqrestore(&lockres->l_lock, flags); ret = ocfs2_cancel_convert(osb, lockres); if (ret < 0) { mlog_errno(ret); goto out; } goto retry_cancel; } lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); spin_unlock_irqrestore(&lockres->l_lock, flags); ocfs2_wait_for_mask(&mw); goto retry_cancel; } ret = -ERESTARTSYS; /* * We may still have gotten the lock, in which case there's no * point to restarting the syscall. */ if (lockres->l_level == level) ret = 0; mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, lockres->l_flags, lockres->l_level, lockres->l_action); spin_unlock_irqrestore(&lockres->l_lock, flags);out: return ret;}/* * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of * flock() calls. The locking approach this requires is sufficiently * different from all other cluster lock types that we implement a * seperate path to the "low-level" dlm calls. In particular: * * - No optimization of lock levels is done - we take at exactly * what's been requested. * * - No lock caching is employed. We immediately downconvert to * no-lock at unlock time. This also means flock locks never go on * the blocking list). * * - Since userspace can trivially deadlock itself with flock, we make * sure to allow cancellation of a misbehaving applications flock() * request. * * - Access to any flock lockres doesn't require concurrency, so we * can simplify the code by requiring the caller to guarantee * serialization of dlmglue flock calls. */int ocfs2_file_lock(struct file *file, int ex, int trylock){ int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; unsigned long flags; struct ocfs2_file_private *fp = file->private_data; struct ocfs2_lock_res *lockres = &fp->fp_flock; struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); struct ocfs2_mask_waiter mw; ocfs2_init_mask_waiter(&mw); if ((lockres->l_flags & OCFS2_LOCK_BUSY) || (lockres->l_level > LKM_NLMODE)) { mlog(ML_ERROR, "File lock \"%s\" has busy or locked state: flags: 0x%lx, " "level: %u\n", lockres->l_name, lockres->l_flags, lockres->l_level); return -EINVAL; } spin_lock_irqsave(&lockres->l_lock, flags); if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); spin_unlock_irqrestore(&lockres->l_lock, flags); /* * Get the lock at NLMODE to start - that way we * can cancel the upconvert request if need be. */ ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); if (ret < 0) { mlog_errno(ret); goto out; } ret = ocfs2_wait_for_mask(&mw); if (ret) { mlog_errno(ret); goto out; } spin_lock_irqsave(&lockres->l_lock, flags); } lockres->l_action = OCFS2_AST_CONVERT; lkm_flags |= LKM_CONVERT; lockres->l_requested = level; lockres_or_flags(lockres, OCFS2_LOCK_BUSY); lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); spin_unlock_irqrestore(&lockres->l_lock, flags); ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, ocfs2_locking_ast, lockres, ocfs2_blocking_ast); if (ret != DLM_NORMAL) { if (trylock && ret == DLM_NOTQUEUED) ret = -EAGAIN; else { ocfs2_log_dlm_error("dlmlock", ret, lockres); ret = -EINVAL; } ocfs2_recover_from_dlm_error(lockres, 1); lockres_remove_mask_waiter(lockres, &mw); goto out; } ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); if (ret == -ERESTARTSYS) { /* * Userspace can cause deadlock itself with * flock(). Current behavior locally is to allow the * deadlock, but abort the system call if a signal is * received. We follow this example, otherwise a * poorly written program could sit in kernel until * reboot. * * Handling this is a bit more complicated for Ocfs2 * though. We can't exit this function with an * outstanding lock request, so a cancel convert is * required. We intentionally overwrite 'ret' - if the * cancel fails and the lock was granted, it's easier * to just bubble sucess back up to the user. */ ret = ocfs2_flock_handle_signal(lockres, level); }out: mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", lockres->l_name, ex, trylock, ret); return ret;}void ocfs2_file_unlock(struct file *file){ int ret; unsigned long flags; struct ocfs2_file_private *fp = file->private_data; struct ocfs2_lock_res *lockres = &fp->fp_flock; struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); struct ocfs2_mask_waiter mw; ocfs2_init_mask_waiter(&mw); if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) return; if (lockres->l_level == LKM_NLMODE) return; mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", lockres->l_name, lockres->l_flags, lockres->l_level, lockres->l_action); spin_lock_irqsave(&lockres->l_lock, flags); /* * Fake a blocking ast for the downconvert code. */ lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); lockres->l_blocking = LKM_EXMODE; ocfs2_prepare_downconvert(lockres, LKM_NLMODE); lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); spin_unlock_irqrestore(&lockres->l_lock, flags); ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); if (ret) { mlog_errno(ret); return; } ret = ocfs2_wait_for_mask(&mw); if (ret) mlog_errno(ret);}static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres){ int kick = 0; mlog_entry_void(); /* If we know that another node is waiting on our lock, kick * the downconvert thread * pre-emptively when we reach a release * condition. */ if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { switch(lockres->l_blocking) { case LKM_EXMODE: if (!lockres->l_ex_holders && !lockres->l_ro_holders) kick = 1; break; case LKM_PRMODE: if (!lockres->l_ex_holders) kick = 1; break; default: BUG(); } } if (kick) ocfs2_wake_downconvert_thread(osb); mlog_exit_void();}#define OCFS2_SEC_BITS 34#define OCFS2_SEC_SHIFT (64 - 34)#define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)/* LVB only has room for 64 bits of time here so we pack it for * now. */static u64 ocfs2_pack_timespec(struct timespec *spec){ u64 res; u64 sec = spec->tv_sec; u32 nsec = spec->tv_nsec; res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); return res;}/* Call this with the lockres locked. I am reasonably sure we don't * need ip_lock in this function as anyone who would be changing those * values is supposed to be blocked in ocfs2_inode_lock right now. */static void __ocfs2_stuff_meta_lvb(struct inode *inode){ struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; mlog_entry_void(); lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; /* * Invalidate the LVB of a deleted inode - this way other * nodes are forced to go to disk and discover the new inode * status. */ if (oi->ip_flags & OCFS2_INODE_DELETED) { lvb->lvb_version = 0; goto out; } lvb->lvb_version = OCFS2_LVB_VERSION; lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); lvb->lvb_iuid = cpu_to_be32(inode->i_uid); lvb->lvb_igid = cpu_to_be32(inode->i_gid); lvb->lvb_imode = cpu_to_be16(inode->i_mode); lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); lvb->lvb_iatime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); lvb->lvb_ictime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);out: mlog_meta_lvb(0, lockres); mlog_exit_void();}static void ocfs2_unpack_timespec(struct timespec *spec, u64 packed_time){ spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;}static void ocfs2_refresh_inode_from_lvb(struct inode *inode){ struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; mlog_entry_void(); mlog_meta_lvb(0, lockres); lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; /* We're safe here without the lockres lock... */ spin_lock(&oi->ip_lock); oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); ocfs2_set_inode_flags(inode); /* fast-symlinks are a special case */ if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) inode->i_blocks = 0; else inode->i_blocks = ocfs2_inode_sector_count(inode); inode->i_uid = be32_to_cpu(lvb->lvb_iuid); inode->i_gid = be32_to_cpu(lvb->lvb_igid); inode->i_mode = be16_to_cpu(lvb->lvb_imode); inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); ocfs2_unpack_timespec(&inode->i_atime, be64_to_cpu(lvb->lvb_iatime_packed)); ocfs2_unpack_timespec(&inode->i_mtime, be64_to_cpu(lvb->lvb_imtime_packed)); ocfs2_unpack_timespec(&inode->i_ctime, be64_to_cpu(lvb->lvb_ictime_packed)); spin_unlock(&oi->ip_lock); mlog_exit_void();}static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, struct ocfs2_lock_res *lockres){ struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; if (lvb->lvb_version == OCFS2_LVB_VERSION && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) return 1; return 0;}/* Determine whether a lock resource needs to be refreshed, and * arbitrate who gets to refresh it. * * 0 means no refresh needed. * * > 0 means you need to refresh this and you MUST call * ocfs2_complete_lock_res_refresh afterwards. */static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres){ unsigned long flags; int status = 0; mlog_entry_void();refresh_check: spin_lock_irqsave(&lockres->l_lock, flags); if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { spin_unlock_irqrestore(&lockres->l_lock, flags); goto bail; } if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { spin_unlock_irqrestore(&lockres->l_lock, flags); ocfs2_wait_on_refreshing_lock(lockres); goto refresh_check; } /* Ok, I'll be the one to refresh this lock. */ lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); spin_unlock_irqrestore(&lockres->l_lock, flags); status = 1;bail: mlog_exit(status); return status;}/* If status is non zero, I'll mark it as not being in refresh * anymroe, but i won't clear the needs refresh flag. */static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, int status){ unsigned long flags; mlog_entry_void(); spin_lock_irqsave(&lockres->l_lock, flags); lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); if (!status) lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); spin_unlock_irqrestore(&lockres->l_lock, flags); wake_up(&lockres->l_event); mlog_exit_void();}/* may or may not return a bh if it went to disk. */static int ocfs2_inode_lock_update(struct inode *inode, struct buffer_head **bh){ int status = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_dinode *fe; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -