📄 mmap.c

📁 ocfs1.2.7 源码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
				binode->ba_meta_locked = 0;				mlog_errno(status);				goto bail;			}			binode->ba_data_locked = 1;		}		ocfs2_add_io_marker(inode, &binode->ba_task);	}	status = 0;bail:	return status;}void ocfs2_unlock_buffer_inodes(struct ocfs2_buffer_lock_ctxt *ctxt){	struct ocfs2_backing_inode *binode;	struct rb_node *node;	/* dlm locks don't mask ints.. this should be lower down */	BUG_ON(in_interrupt());	/* unlock in reverse order to minimize waking forward lockers */	while ((node = rb_last(&ctxt->b_inodes)) != NULL) {		binode = rb_entry(node, struct ocfs2_backing_inode, ba_node);		ocfs2_del_io_marker(binode->ba_inode, &binode->ba_task);		if (binode->ba_data_locked)			ocfs2_data_unlock(binode->ba_inode,					  binode->ba_lock_data_level);		if (binode->ba_meta_locked)			ocfs2_meta_unlock(binode->ba_inode,					  binode->ba_lock_meta_level);		rb_erase(node, &ctxt->b_inodes);		kfree(binode);	}	ctxt->b_next_unlocked = NULL;}static int ocfs2_write_remove_suid(struct inode *inode){	int ret;	struct buffer_head *bh = NULL;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	struct ocfs2_journal_handle *handle;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	struct ocfs2_dinode *di;	mlog_entry("(Inode %"MLFu64", mode 0%o)\n", oi->ip_blkno,		   inode->i_mode);	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);	if (handle == NULL) {		ret = -ENOMEM;		mlog_errno(ret);		goto out;	}	ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);	if (ret < 0) {		mlog_errno(ret);		goto out_trans;	}	ocfs2_set_inode_lock_trans(osb->journal, inode);	ret = ocfs2_journal_access(handle, inode, bh,				   OCFS2_JOURNAL_ACCESS_WRITE);	if (ret < 0) {		mlog_errno(ret);		goto out_bh;	}	inode->i_mode &= ~S_ISUID;	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))		inode->i_mode &= ~S_ISGID;	di = (struct ocfs2_dinode *) bh->b_data;	di->i_mode = cpu_to_le16(inode->i_mode);	ret = ocfs2_journal_dirty(handle, bh);	if (ret < 0)		mlog_errno(ret);out_bh:	brelse(bh);out_trans:	ocfs2_commit_trans(handle);out:	mlog_exit(ret);	return ret;}static inline int ocfs2_write_should_remove_suid(struct inode *inode){	mode_t mode = inode->i_mode;	if (!capable(CAP_FSETID)) {		if (unlikely(mode & S_ISUID))			return 1;		if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))			return 1;	}	return 0;}/* * This builds up the locking state that will be used by a write.  both normal * file writes and AIO writes come in through here.  This function does no * teardown on its own.  The caller must examine the info struct to see if it * needs to release locks or i_mutex, etc.  This function is also restartable in * that it can return EIOCBRETRY if it would have blocked in the dlm.  It * stores its partial progress in the info struct so the caller can call back * in when it thinks the dlm won't block any more.  Thus, the caller must zero * the info struct before calling in the first time. */ssize_t ocfs2_write_lock_maybe_extend(struct file *filp,				      const char __user *buf,				      size_t count,				      loff_t *ppos,				      struct ocfs2_write_lock_info *info,				      struct ocfs2_buffer_lock_ctxt *ctxt){	int ret = 0;	struct dentry *dentry = filp->f_dentry;	struct inode *inode = dentry->d_inode;	struct ocfs2_super *osb = osb = OCFS2_SB(inode->i_sb);	struct ocfs2_backing_inode *ba;	int status;	loff_t saved_ppos;	u64 bytes_added = 0;	/* 	 * the target inode is different from the other inodes.  in o_direct it	 * gets a PR data lock (see below) and when appending it gets an EX	 * meta lock.  It's locked manually here though the backing_inode	 * fields are maintained while doing so so that unlock does the	 * right thing.	 */	if (info->wl_target_binode == NULL) {		ret = ocfs2_setup_io_locks(inode->i_sb, inode,					   (char __user *) buf,					   count, ctxt,					   &info->wl_target_binode);		if (ret < 0) {			BUG_ON(ret == -EIOCBRETRY);			mlog_errno(ret);			goto bail;		}	}	ba = info->wl_target_binode;	/* This will lock everyone in the context who's order puts	 * them before us. */	if (!info->wl_have_before) {		info->wl_unlock_ctxt = 1;		ret = ocfs2_lock_buffer_inodes(ctxt, inode);		if (ret < 0) {			if (ret != -EIOCBRETRY)				mlog_errno(ret);			goto bail;		}		info->wl_have_before = 1;	}	if (!info->wl_have_i_mutex) {		mutex_lock(&inode->i_mutex);		info->wl_have_i_mutex = 1;	}	ba->ba_lock_data_level = 1;	if (filp->f_flags & O_APPEND)		ba->ba_lock_meta_level = 1;retry_meta_lock:	if (!ba->ba_meta_locked) {		status = ocfs2_meta_lock(inode, NULL, NULL,					 ba->ba_lock_meta_level);		if (status < 0) {			mlog_errno(status);			ret = status;			goto bail;		}		ba->ba_meta_locked = 1;	}	/* Clear suid / sgid if necessary. We do this here instead of	 * later in the write path because remove_suid() calls	 * ->setattr without any hint that we may have already done	 * our cluster locking. Since ocfs2_setattr() *must* take	 * cluster locks to proceeed, this will lead us to recursively	 * lock the inode. There's also the dinode i_size state which	 * can be lost via setattr during extending writes (we set	 * inode->i_size at the end of a write. */	if (ocfs2_write_should_remove_suid(inode)) {		if (ba->ba_lock_meta_level == 0) {			mlog(0, "inode %"MLFu64", had a PR, looping back for "			     "EX so we can remove SUID\n",			     OCFS2_I(inode)->ip_blkno);			ocfs2_meta_unlock(inode, ba->ba_lock_meta_level);			ba->ba_meta_locked = 0;			ba->ba_lock_meta_level = 1;			goto retry_meta_lock;		}		status = ocfs2_write_remove_suid(inode);		if (status < 0) {			mlog_errno(status);			ret = status;			goto bail;		}	}	/* work on a copy of ppos until we're sure that we won't have	 * to recalculate it due to relocking. */	saved_ppos = *ppos;	if (filp->f_flags & O_APPEND) {		saved_ppos = i_size_read(inode);		mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_ppos);#ifdef OCFS2_ORACORE_WORKAROUNDS		if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) {			/* ugh, work around some applications which open			 * everything O_DIRECT + O_APPEND and really don't			 * mean to use O_DIRECT. */			filp->f_flags &= ~O_DIRECT;		}#endif	}	if (filp->f_flags & O_DIRECT) {#ifdef OCFS2_ORACORE_WORKAROUNDS		if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) {			int sector_size = 1 << osb->s_sectsize_bits;			if ((saved_ppos & (sector_size - 1)) ||			    (count & (sector_size - 1)) ||			    ((unsigned long)buf & (sector_size - 1))) {				info->wl_do_direct_io = 0;				filp->f_flags |= O_SYNC;			} else {				info->wl_do_direct_io = 1;			}		} else#endif			info->wl_do_direct_io = 1;		mlog(0, "O_DIRECT\n");	}	/*	 * We get PR data locks even for O_DIRECT.  This allows concurrent	 * O_DIRECT writes but doesn't let O_DIRECT writes race with 	 * extending and buffered zeroing writes race.  If they did race	 * then the buffered zeroing could be written back after the O_DIRECT 	 * write and overwrite it.  It's one thing to tell people not to 	 * mix buffered and O_DIRECT writes, but expecting them to understand	 * that file extension is also an implicit buffered write is 	 * too much.  By getting the PR we force writeback of the buffered	 * zeroing before proceeding.	 */	if (info->wl_do_direct_io && !(filp->f_flags & O_APPEND))		ba->ba_lock_data_level = 0;	info->wl_newsize = count + saved_ppos;	if (filp->f_flags & O_APPEND)		info->wl_newsize = count + i_size_read(inode);	/* get the locking straight for the extending case */	if (info->wl_newsize > i_size_read(inode)) {		if (ba->ba_lock_meta_level == 0) {			mlog(0, "inode %"MLFu64", had a PR meta, looping back "			     "for EX\n", OCFS2_I(inode)->ip_blkno);			ocfs2_meta_unlock(inode, ba->ba_lock_meta_level);			ba->ba_meta_locked = 0;			ba->ba_lock_meta_level = 1;			goto retry_meta_lock;		}		ba->ba_lock_data_level = 1;	}	/*	 * get the data lock before extending so that we can be sure	 * that we'll be able to zero under lock coverage.  This does	 * get an EX data lock for O_DIRECT but as long as zeroing is	 * buffered we really must hold the lock while manipulating the	 * page cache.	 */	if (!ba->ba_data_locked) {		status = ocfs2_data_lock(inode, ba->ba_lock_data_level);		if (status < 0) {			mlog_errno(status);			ret = status;			goto bail;		}		ba->ba_data_locked = 1;	}	mlog(0, "ppos=%lld newsize=%"MLFu64" cursize=%lld\n", saved_ppos,	     info->wl_newsize, i_size_read(inode));	if (info->wl_newsize > i_size_read(inode)) {		mlog(0, "Writing at EOF, will need more allocation: "		     "i_size=%lld, need=%"MLFu64"\n", i_size_read(inode),		     info->wl_newsize);		/* If we extend AT ALL here then we update our state		 * and continue the write call, regardless of error --		 * this is basically a short write. */		status = ocfs2_extend_file(osb, inode, info->wl_newsize,					   &bytes_added);		if (status < 0 && (!bytes_added)) {			if (status != -ERESTARTSYS			    && status != -EINTR			    && status != -ENOSPC) {				mlog_errno(status);				mlog(ML_ERROR, "Failed to extend inode %"MLFu64				     " from %lld to %"MLFu64,				     OCFS2_I(inode)->ip_blkno,				     *ppos, info->wl_newsize);			}			ret = status;			goto bail;		}		info->wl_extended = 1;		/* We need to recalulate newsize and count according		 * to what extend could give us. If we got the whole		 * extend then this doesn't wind up changing the		 * values. */		info->wl_newsize = i_size_read(inode) + bytes_added;		count = info->wl_newsize - saved_ppos;		if (status < 0		    && status != -ENOSPC		    && status != -EINTR		    && status != -ERESTARTSYS)			mlog(ML_ERROR, "status return of %d extending inode "			     "%"MLFu64"\n", status,			     OCFS2_I(inode)->ip_blkno);		status = 0;	}	/* we've got whatever cluster lock is appropriate now, so we	 * can stuff *ppos back. */	*ppos = saved_ppos;	/* This will lock everyone who's order puts them *after* our inode. */	ret = ocfs2_lock_buffer_inodes(ctxt, NULL);	if (ret < 0) {		if (ret != -EIOCBRETRY)			mlog_errno(ret);		goto bail;	}bail:	mlog_exit(ret);	return ret;}#if 0static void ocfs2_buffer_ctxt_debug(struct ocfs2_buffer_lock_ctxt *ctxt){	struct ocfs2_backing_inode *binode;	struct inode *inode;	struct rb_node *node;	printk("(%u) ocfs2: buffer lock ctxt: direct io = %d\n",	       current->pid, ctxt->b_lock_direct);	node = rb_first(&ctxt->b_inodes);	while (node) {		binode = rb_entry(node, struct ocfs2_backing_inode, ba_node);		inode = binode->ba_inode;		printk("(%u) ocfs2: inode %llu, locked %d, is target? %s\n",		       current->pid, OCFS2_I(inode)->ip_blkno,		       binode->ba_locked,		       ocfs2_buffer_lock_is_target(ctxt, inode) ? "yes" :		       "no");		node = rb_next(node);	}}#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -