📄 file.c

📁 ocfs1.4.1 oracle分布式文件系统
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
				    loff_t pos){	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;	int can_do_direct, sync = 0;	ssize_t written = 0;	size_t ocount;		/* original count */	size_t count;		/* after file limit checks */	loff_t old_size, *ppos = &iocb->ki_pos;	u32 old_clusters;	struct file *file = iocb->ki_filp;	struct inode *inode = filp_dentry(file)->d_inode;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry("(0x%p, %u, '%.*s')\n", file,		   (unsigned int)nr_segs,		   filp_dentry(file)->d_name.len,		   filp_dentry(file)->d_name.name);	if (iocb->ki_left == 0)		return 0;	ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);	if (ret)		return ret;	count = ocount;	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);	appending = file->f_flags & O_APPEND ? 1 : 0;	direct_io = file->f_flags & O_DIRECT ? 1 : 0;	mutex_lock(&inode->i_mutex);relock:	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */	if (direct_io) {		down_read(&inode->i_alloc_sem);		have_alloc_sem = 1;	}	/* concurrent O_DIRECT writes are allowed */	rw_level = !direct_io;	ret = ocfs2_rw_lock(inode, rw_level);	if (ret < 0) {		mlog_errno(ret);		goto out_sems;	}	can_do_direct = direct_io;	ret = ocfs2_prepare_inode_for_write(filp_dentry(file), ppos,					    iocb->ki_left, appending,					    &can_do_direct);	if (ret < 0) {		mlog_errno(ret);		goto out;	}	/*	 * We can't complete the direct I/O as requested, fall back to	 * buffered I/O.	 */	if (direct_io && !can_do_direct) {		ocfs2_rw_unlock(inode, rw_level);		up_read(&inode->i_alloc_sem);		have_alloc_sem = 0;		rw_level = -1;		direct_io = 0;		sync = 1;		goto relock;	}	if (!sync && ((file->f_flags & O_SYNC) || IS_SYNC(inode)))		sync = 1;	/*	 * XXX: Is it ok to execute these checks a second time?	 */	ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode));	if (ret)		goto out;	/*	 * Set pos so that sync_page_range_nolock() below understands	 * where to start from. We might've moved it around via the	 * calls above. The range we want to actually sync starts from	 * *ppos here.	 *	 */	pos = *ppos;	/*	 * To later detect whether a journal commit for sync writes is	 * necessary, we sample i_size, and cluster count here.	 */	old_size = i_size_read(inode);	old_clusters = OCFS2_I(inode)->ip_clusters;	/* communicate with ocfs2_dio_end_io */	ocfs2_iocb_set_rw_locked(iocb, rw_level);	if (direct_io) {		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,						    ppos, count, ocount);		if (written < 0) {			ret = written;			goto out_dio;		}	} else {		written = ocfs2_file_buffered_write(file, ppos, iov, nr_segs,						    count, written);		if (written < 0) {			ret = written;			if (ret != -EFAULT || ret != -ENOSPC)				mlog_errno(ret);			goto out;		}	}out_dio:	/* buffered aio wouldn't have proper lock coverage today */	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));	if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {		/*		 * The generic write paths have handled getting data		 * to disk, but since we don't make use of the dirty		 * inode list, a manual journal commit is necessary		 * here.		 */		if (old_size != i_size_read(inode) ||		    old_clusters != OCFS2_I(inode)->ip_clusters) {			ret = journal_force_commit(osb->journal->j_journal);			if (ret < 0)				written = ret;		}	}	/* 	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io	 * function pointer which is called when o_direct io completes so that	 * it can unlock our rw lock.  (it's the clustered equivalent of	 * i_alloc_sem; protects truncate from racing with pending ios).	 * Unfortunately there are error cases which call end_io and others	 * that don't.  so we don't have to unlock the rw_lock if either an	 * async dio is going to do it in the future or an end_io after an	 * error has already done it.	 */	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {		rw_level = -1;		have_alloc_sem = 0;	}out:	if (rw_level != -1)		ocfs2_rw_unlock(inode, rw_level);out_sems:	if (have_alloc_sem)		up_read(&inode->i_alloc_sem);	if (written > 0 && sync) {		ssize_t err;		err = sync_page_range_nolock(inode, file->f_mapping, pos, count);		if (err < 0)			written = err;	}	mutex_unlock(&inode->i_mutex);	mlog_exit(ret);	return written ? written : ret;}#ifndef NO_SPLICE_SUPPORTstatic int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,				    struct pipe_buffer *buf,				    struct splice_desc *sd){	int ret, count;	ssize_t copied = 0;	struct file *file = sd_file(sd);	unsigned int offset;	struct page *page = NULL;	void *fsdata;	char *src, *dst;	ret = buf->ops->kapi_confirm(pipe, buf);	if (ret)		goto out;	offset = sd->pos & ~PAGE_CACHE_MASK;	count = sd->len;	if (count + offset > PAGE_CACHE_SIZE)		count = PAGE_CACHE_SIZE - offset;	ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,				&page, &fsdata);	if (ret) {		mlog_errno(ret);		goto out;	}	src = buf->ops->map(pipe, buf, 1);	dst = kmap_atomic(page, KM_USER1);	memcpy(dst + offset, src + buf->offset, count);	kunmap_atomic(dst, KM_USER1);	buf->ops->unmap(pipe, buf, src);	copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,				 page, fsdata);	if (copied < 0) {		mlog_errno(copied);		ret = copied;		goto out;	}out:	return copied ? copied : ret;}static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,					 struct file *out,					 loff_t *ppos,					 size_t len,					 unsigned int flags){	int ret, err;	struct address_space *mapping = out->f_mapping;	struct inode *inode = mapping->host;	struct splice_desc sd = {		.total_len = len,		.flags = flags,		.pos = *ppos,	};	sd_file(&sd) = out;	ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);	if (ret > 0) {		*ppos += ret;		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {			err = generic_osync_inode(inode, mapping,						  OSYNC_METADATA|OSYNC_DATA);			if (err)				ret = err;		}	}	return ret;}static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,				       struct file *out,				       loff_t *ppos,				       size_t len,				       unsigned int flags){	int ret;	struct inode *inode = filp_dentry(out)->d_inode;	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,		   (unsigned int)len,		   filp_dentry(out)->d_name.len,		   filp_dentry(out)->d_name.name);	inode_double_lock(inode, pipe->inode);	ret = ocfs2_rw_lock(inode, 1);	if (ret < 0) {		mlog_errno(ret);		goto out;	}	ret = ocfs2_prepare_inode_for_write(filp_dentry(out), ppos, len, 0,					    NULL);	if (ret < 0) {		mlog_errno(ret);		goto out_unlock;	}	/* ok, we're done with i_size and alloc work */	ret = __ocfs2_file_splice_write(pipe, out, ppos, len, flags);out_unlock:	ocfs2_rw_unlock(inode, 1);out:	inode_double_unlock(inode, pipe->inode);	mlog_exit(ret);	return ret;}static ssize_t ocfs2_file_splice_read(struct file *in,				      loff_t *ppos,				      struct pipe_inode_info *pipe,				      size_t len,				      unsigned int flags){	int ret = 0;	struct inode *inode = filp_dentry(in)->d_inode;	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,		   (unsigned int)len,		   filp_dentry(in)->d_name.len,		   filp_dentry(in)->d_name.name);	/*	 * See the comment in ocfs2_file_aio_read()	 */	ret = ocfs2_inode_lock(inode, NULL, 0);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	ocfs2_inode_unlock(inode, 0);	ret = generic_file_splice_read(in, ppos, pipe, len, flags);bail:	mlog_exit(ret);	return ret;}#endifstatic ssize_t __ocfs2_file_aio_read(struct kiocb *iocb,				   const struct iovec *iov,				   unsigned long nr_segs,				   loff_t pos){	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;	struct file *filp = iocb->ki_filp;	struct inode *inode = filp_dentry(filp)->d_inode;	mlog_entry("(0x%p, %u, '%.*s')\n", filp,		   (unsigned int)nr_segs,		   filp_dentry(filp)->d_name.len,		   filp_dentry(filp)->d_name.name);	if (!inode) {		ret = -EINVAL;		mlog_errno(ret);		goto bail;	}	/* 	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads	 * need locks to protect pending reads from racing with truncate.	 */	if (filp->f_flags & O_DIRECT) {		down_read(&inode->i_alloc_sem);		have_alloc_sem = 1;		ret = ocfs2_rw_lock(inode, 0);		if (ret < 0) {			mlog_errno(ret);			goto bail;		}		rw_level = 0;		/* communicate with ocfs2_dio_end_io */		ocfs2_iocb_set_rw_locked(iocb, rw_level);	}	/*	 * We're fine letting folks race truncates and extending	 * writes with read across the cluster, just like they can	 * locally. Hence no rw_lock during read.	 * 	 * Take and drop the meta data lock to update inode fields	 * like i_size. This allows the checks down below	 * generic_file_aio_read() a chance of actually working. 	 */	ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	ocfs2_inode_unlock(inode, lock_level);	ret = kapi_generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);	if (ret == -EINVAL)		mlog(0, "generic_file_aio_read returned -EINVAL\n");	/* buffered aio wouldn't have proper lock coverage today */	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));	/* see ocfs2_file_aio_write */	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {		rw_level = -1;		have_alloc_sem = 0;	}bail:	if (have_alloc_sem)		up_read(&inode->i_alloc_sem);	if (rw_level != -1) 		ocfs2_rw_unlock(inode, rw_level);	mlog_exit(ret);	return ret;}#ifdef HAS_FOPS_SENDFILEstatic ssize_t ocfs2_file_sendfile(struct file *in_file,				   loff_t *ppos,				   size_t count,				   read_actor_t actor,				   void *target){	int ret;	struct inode *inode = in_file->f_mapping->host;	mlog_entry("inode %llu, ppos %lld, count = %u\n",		   (unsigned long long)OCFS2_I(inode)->ip_blkno,		   (long long) *ppos,		   (unsigned int) count);	/* Obviously, there is no user buffer to worry about here --	 * this simplifies locking, so no need to walk vmas a la	 * read/write. We take the cluster lock against the inode	 * and call generic_file_sendfile. */	ret = ocfs2_inode_lock(inode, NULL, 0);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	down_read(&OCFS2_I(inode)->ip_alloc_sem);	ret = generic_file_sendfile(in_file, ppos, count, actor, target);	up_read(&OCFS2_I(inode)->ip_alloc_sem);	ocfs2_inode_unlock(inode, 0);bail:	mlog_exit(ret);	return ret;}#endif#ifdef IOP_IS_NOT_CONSTstruct inode_operations ocfs2_file_iops = {#elseconst struct inode_operations ocfs2_file_iops = {#endif	.setattr	= ocfs2_setattr,	.getattr	= ocfs2_getattr,	.permission	= ocfs2_permission,#ifndef NO_FALLOCATE	.fallocate	= ocfs2_fallocate,#endif};#ifdef IOP_IS_NOT_CONSTstruct inode_operations ocfs2_special_file_iops = {#elseconst struct inode_operations ocfs2_special_file_iops = {#endif	.setattr	= ocfs2_setattr,	.getattr	= ocfs2_getattr,	.permission	= ocfs2_permission,};#ifdef FOP_IS_NOT_CONSTstruct file_operations ocfs2_fops = {#elseconst struct file_operations ocfs2_fops = {#endif	.llseek		= generic_file_llseek,	.read		= do_sync_read,	.write		= do_sync_write,	.mmap		= ocfs2_mmap,	.fsync		= ocfs2_sync_file,	.release	= ocfs2_file_release,	.open		= ocfs2_file_open,	.aio_read	= ocfs2_file_aio_read,	.aio_write	= ocfs2_file_aio_write,	.ioctl		= ocfs2_ioctl,#ifdef CONFIG_COMPAT	.compat_ioctl   = ocfs2_compat_ioctl,#endif	.flock		= ocfs2_flock,#ifndef NO_SPLICE_SUPPORT	.splice_read	= ocfs2_file_splice_read,	.splice_write	= ocfs2_file_splice_write,#endif#ifdef HAS_FOPS_SENDFILE	.sendfile	= ocfs2_file_sendfile,#endif};#ifdef FOP_IS_NOT_CONSTstruct file_operations ocfs2_dops = {#elseconst struct file_operations ocfs2_dops = {#endif	.llseek		= generic_file_llseek,	.read		= generic_read_dir,	.readdir	= ocfs2_readdir,	.fsync		= ocfs2_sync_file,	.release	= ocfs2_dir_release,	.open		= ocfs2_dir_open,	.ioctl		= ocfs2_ioctl,#ifdef CONFIG_COMPAT	.compat_ioctl   = ocfs2_compat_ioctl,#endif	.flock		= ocfs2_flock,};
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -