📄 file.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	}	if (file && should_remove_suid(file->f_path.dentry)) {		ret = __ocfs2_write_remove_suid(inode, di_bh);		if (ret) {			mlog_errno(ret);			goto out_meta_unlock;		}	}	down_write(&OCFS2_I(inode)->ip_alloc_sem);	switch (cmd) {	case OCFS2_IOC_RESVSP:	case OCFS2_IOC_RESVSP64:		/*		 * This takes unsigned offsets, but the signed ones we		 * pass have been checked against overflow above.		 */		ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,						       sr->l_len);		break;	case OCFS2_IOC_UNRESVSP:	case OCFS2_IOC_UNRESVSP64:		ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,					       sr->l_len);		break;	default:		ret = -EINVAL;	}	up_write(&OCFS2_I(inode)->ip_alloc_sem);	if (ret) {		mlog_errno(ret);		goto out_meta_unlock;	}	/*	 * We update c/mtime for these changes	 */	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		mlog_errno(ret);		goto out_meta_unlock;	}	if (change_size && i_size_read(inode) < size)		i_size_write(inode, size);	inode->i_ctime = inode->i_mtime = CURRENT_TIME;	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);	if (ret < 0)		mlog_errno(ret);	ocfs2_commit_trans(osb, handle);out_meta_unlock:	brelse(di_bh);	ocfs2_meta_unlock(inode, 1);out_rw_unlock:	ocfs2_rw_unlock(inode, 1);	mutex_unlock(&inode->i_mutex);out:	return ret;}int ocfs2_change_file_space(struct file *file, unsigned int cmd,			    struct ocfs2_space_resv *sr){	struct inode *inode = file->f_path.dentry->d_inode;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);;	if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&	    !ocfs2_writes_unwritten_extents(osb))		return -ENOTTY;	else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&		 !ocfs2_sparse_alloc(osb))		return -ENOTTY;	if (!S_ISREG(inode->i_mode))		return -EINVAL;	if (!(file->f_mode & FMODE_WRITE))		return -EBADF;	return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);}static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,			    loff_t len){	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	struct ocfs2_space_resv sr;	int change_size = 1;	if (!ocfs2_writes_unwritten_extents(osb))		return -EOPNOTSUPP;	if (S_ISDIR(inode->i_mode))		return -ENODEV;	if (mode & FALLOC_FL_KEEP_SIZE)		change_size = 0;	sr.l_whence = 0;	sr.l_start = (s64)offset;	sr.l_len = (s64)len;	return __ocfs2_change_file_space(NULL, inode, offset,					 OCFS2_IOC_RESVSP64, &sr, change_size);}static int ocfs2_prepare_inode_for_write(struct dentry *dentry,					 loff_t *ppos,					 size_t count,					 int appending,					 int *direct_io){	int ret = 0, meta_level = 0;	struct inode *inode = dentry->d_inode;	loff_t saved_pos, end;	/* 	 * We start with a read level meta lock and only jump to an ex	 * if we need to make modifications here.	 */	for(;;) {		ret = ocfs2_meta_lock(inode, NULL, meta_level);		if (ret < 0) {			meta_level = -1;			mlog_errno(ret);			goto out;		}		/* Clear suid / sgid if necessary. We do this here		 * instead of later in the write path because		 * remove_suid() calls ->setattr without any hint that		 * we may have already done our cluster locking. Since		 * ocfs2_setattr() *must* take cluster locks to		 * proceeed, this will lead us to recursively lock the		 * inode. There's also the dinode i_size state which		 * can be lost via setattr during extending writes (we		 * set inode->i_size at the end of a write. */		if (should_remove_suid(dentry)) {			if (meta_level == 0) {				ocfs2_meta_unlock(inode, meta_level);				meta_level = 1;				continue;			}			ret = ocfs2_write_remove_suid(inode);			if (ret < 0) {				mlog_errno(ret);				goto out_unlock;			}		}		/* work on a copy of ppos until we're sure that we won't have		 * to recalculate it due to relocking. */		if (appending) {			saved_pos = i_size_read(inode);			mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);		} else {			saved_pos = *ppos;		}		end = saved_pos + count;		/*		 * Skip the O_DIRECT checks if we don't need		 * them.		 */		if (!direct_io || !(*direct_io))			break;		/*		 * There's no sane way to do direct writes to an inode		 * with inline data.		 */		if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {			*direct_io = 0;			break;		}		/*		 * Allowing concurrent direct writes means		 * i_size changes wouldn't be synchronized, so		 * one node could wind up truncating another		 * nodes writes.		 */		if (end > i_size_read(inode)) {			*direct_io = 0;			break;		}		/*		 * We don't fill holes during direct io, so		 * check for them here. If any are found, the		 * caller will have to retake some cluster		 * locks and initiate the io as buffered.		 */		ret = ocfs2_check_range_for_holes(inode, saved_pos, count);		if (ret == 1) {			*direct_io = 0;			ret = 0;		} else if (ret < 0)			mlog_errno(ret);		break;	}	if (appending)		*ppos = saved_pos;out_unlock:	ocfs2_meta_unlock(inode, meta_level);out:	return ret;}static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,				    const struct iovec *iov,				    unsigned long nr_segs,				    loff_t pos){	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;	int can_do_direct;	ssize_t written = 0;	size_t ocount;		/* original count */	size_t count;		/* after file limit checks */	loff_t old_size, *ppos = &iocb->ki_pos;	u32 old_clusters;	struct file *file = iocb->ki_filp;	struct inode *inode = file->f_path.dentry->d_inode;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry("(0x%p, %u, '%.*s')\n", file,		   (unsigned int)nr_segs,		   file->f_path.dentry->d_name.len,		   file->f_path.dentry->d_name.name);	if (iocb->ki_left == 0)		return 0;	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);	appending = file->f_flags & O_APPEND ? 1 : 0;	direct_io = file->f_flags & O_DIRECT ? 1 : 0;	mutex_lock(&inode->i_mutex);relock:	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */	if (direct_io) {		down_read(&inode->i_alloc_sem);		have_alloc_sem = 1;	}	/* concurrent O_DIRECT writes are allowed */	rw_level = !direct_io;	ret = ocfs2_rw_lock(inode, rw_level);	if (ret < 0) {		mlog_errno(ret);		goto out_sems;	}	can_do_direct = direct_io;	ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,					    iocb->ki_left, appending,					    &can_do_direct);	if (ret < 0) {		mlog_errno(ret);		goto out;	}	/*	 * We can't complete the direct I/O as requested, fall back to	 * buffered I/O.	 */	if (direct_io && !can_do_direct) {		ocfs2_rw_unlock(inode, rw_level);		up_read(&inode->i_alloc_sem);		have_alloc_sem = 0;		rw_level = -1;		direct_io = 0;		goto relock;	}	/*	 * To later detect whether a journal commit for sync writes is	 * necessary, we sample i_size, and cluster count here.	 */	old_size = i_size_read(inode);	old_clusters = OCFS2_I(inode)->ip_clusters;	/* communicate with ocfs2_dio_end_io */	ocfs2_iocb_set_rw_locked(iocb, rw_level);	if (direct_io) {		ret = generic_segment_checks(iov, &nr_segs, &ocount,					     VERIFY_READ);		if (ret)			goto out_dio;		ret = generic_write_checks(file, ppos, &count,					   S_ISBLK(inode->i_mode));		if (ret)			goto out_dio;		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,						    ppos, count, ocount);		if (written < 0) {			ret = written;			goto out_dio;		}	} else {		written = generic_file_aio_write_nolock(iocb, iov, nr_segs,							*ppos);	}out_dio:	/* buffered aio wouldn't have proper lock coverage today */	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));	if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {		/*		 * The generic write paths have handled getting data		 * to disk, but since we don't make use of the dirty		 * inode list, a manual journal commit is necessary		 * here.		 */		if (old_size != i_size_read(inode) ||		    old_clusters != OCFS2_I(inode)->ip_clusters) {			ret = journal_force_commit(osb->journal->j_journal);			if (ret < 0)				written = ret;		}	}	/* 	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io	 * function pointer which is called when o_direct io completes so that	 * it can unlock our rw lock.  (it's the clustered equivalent of	 * i_alloc_sem; protects truncate from racing with pending ios).	 * Unfortunately there are error cases which call end_io and others	 * that don't.  so we don't have to unlock the rw_lock if either an	 * async dio is going to do it in the future or an end_io after an	 * error has already done it.	 */	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {		rw_level = -1;		have_alloc_sem = 0;	}out:	if (rw_level != -1)		ocfs2_rw_unlock(inode, rw_level);out_sems:	if (have_alloc_sem)		up_read(&inode->i_alloc_sem);	mutex_unlock(&inode->i_mutex);	mlog_exit(ret);	return written ? written : ret;}static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,				       struct file *out,				       loff_t *ppos,				       size_t len,				       unsigned int flags){	int ret;	struct inode *inode = out->f_path.dentry->d_inode;	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,		   (unsigned int)len,		   out->f_path.dentry->d_name.len,		   out->f_path.dentry->d_name.name);	inode_double_lock(inode, pipe->inode);	ret = ocfs2_rw_lock(inode, 1);	if (ret < 0) {		mlog_errno(ret);		goto out;	}	ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,					    NULL);	if (ret < 0) {		mlog_errno(ret);		goto out_unlock;	}	ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);out_unlock:	ocfs2_rw_unlock(inode, 1);out:	inode_double_unlock(inode, pipe->inode);	mlog_exit(ret);	return ret;}static ssize_t ocfs2_file_splice_read(struct file *in,				      loff_t *ppos,				      struct pipe_inode_info *pipe,				      size_t len,				      unsigned int flags){	int ret = 0;	struct inode *inode = in->f_path.dentry->d_inode;	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,		   (unsigned int)len,		   in->f_path.dentry->d_name.len,		   in->f_path.dentry->d_name.name);	/*	 * See the comment in ocfs2_file_aio_read()	 */	ret = ocfs2_meta_lock(inode, NULL, 0);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	ocfs2_meta_unlock(inode, 0);	ret = generic_file_splice_read(in, ppos, pipe, len, flags);bail:	mlog_exit(ret);	return ret;}static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,				   const struct iovec *iov,				   unsigned long nr_segs,				   loff_t pos){	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;	struct file *filp = iocb->ki_filp;	struct inode *inode = filp->f_path.dentry->d_inode;	mlog_entry("(0x%p, %u, '%.*s')\n", filp,		   (unsigned int)nr_segs,		   filp->f_path.dentry->d_name.len,		   filp->f_path.dentry->d_name.name);	if (!inode) {		ret = -EINVAL;		mlog_errno(ret);		goto bail;	}	/* 	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads	 * need locks to protect pending reads from racing with truncate.	 */	if (filp->f_flags & O_DIRECT) {		down_read(&inode->i_alloc_sem);		have_alloc_sem = 1;		ret = ocfs2_rw_lock(inode, 0);		if (ret < 0) {			mlog_errno(ret);			goto bail;		}		rw_level = 0;		/* communicate with ocfs2_dio_end_io */		ocfs2_iocb_set_rw_locked(iocb, rw_level);	}	/*	 * We're fine letting folks race truncates and extending	 * writes with read across the cluster, just like they can	 * locally. Hence no rw_lock during read.	 * 	 * Take and drop the meta data lock to update inode fields	 * like i_size. This allows the checks down below	 * generic_file_aio_read() a chance of actually working. 	 */	ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	ocfs2_meta_unlock(inode, lock_level);	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);	if (ret == -EINVAL)		mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");	/* buffered aio wouldn't have proper lock coverage today */	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));	/* see ocfs2_file_aio_write */	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {		rw_level = -1;		have_alloc_sem = 0;	}bail:	if (have_alloc_sem)		up_read(&inode->i_alloc_sem);	if (rw_level != -1) 		ocfs2_rw_unlock(inode, rw_level);	mlog_exit(ret);	return ret;}const struct inode_operations ocfs2_file_iops = {	.setattr	= ocfs2_setattr,	.getattr	= ocfs2_getattr,	.permission	= ocfs2_permission,	.fallocate	= ocfs2_fallocate,};const struct inode_operations ocfs2_special_file_iops = {	.setattr	= ocfs2_setattr,	.getattr	= ocfs2_getattr,	.permission	= ocfs2_permission,};const struct file_operations ocfs2_fops = {	.read		= do_sync_read,	.write		= do_sync_write,	.mmap		= ocfs2_mmap,	.fsync		= ocfs2_sync_file,	.release	= ocfs2_file_release,	.open		= ocfs2_file_open,	.aio_read	= ocfs2_file_aio_read,	.aio_write	= ocfs2_file_aio_write,	.ioctl		= ocfs2_ioctl,#ifdef CONFIG_COMPAT	.compat_ioctl   = ocfs2_compat_ioctl,#endif	.splice_read	= ocfs2_file_splice_read,	.splice_write	= ocfs2_file_splice_write,};const struct file_operations ocfs2_dops = {	.read		= generic_read_dir,	.readdir	= ocfs2_readdir,	.fsync		= ocfs2_sync_file,	.ioctl		= ocfs2_ioctl,#ifdef CONFIG_COMPAT	.compat_ioctl   = ocfs2_compat_ioctl,#endif};
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -