📄 file.c

📁 ocfs1.2.7 源码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * file.c * * File open, close, extend, truncate * * Copyright (C) 2002, 2004 Oracle.  All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include <linux/fs.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/pagemap.h>#include <linux/uio.h>#define MLOG_MASK_PREFIX ML_INODE#include <cluster/masklog.h>#include "ocfs2.h"#include "aio.h"#include "alloc.h"#include "dir.h"#include "dlmglue.h"#include "extent_map.h"#include "file.h"#include "sysfile.h"#include "inode.h"#include "journal.h"#include "mmap.h"#include "suballoc.h"#include "super.h"#include "buffer_head_io.h"static int ocfs2_zero_extend(struct inode *inode);static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,				     struct inode *inode,				     struct buffer_head *fe_bh,				     u64 new_i_size);int ocfs2_sync_inode(struct inode *inode){	filemap_fdatawrite(inode->i_mapping);	return sync_mapping_buffers(inode->i_mapping);}static int ocfs2_file_open(struct inode *inode, struct file *file){	int status;	int mode = file->f_flags;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,		   file->f_dentry->d_name.len, file->f_dentry->d_name.name);	spin_lock(&oi->ip_lock);	/* Check that the inode hasn't been wiped from disk by another	 * node. If it hasn't then we're safe as long as we hold the	 * spin lock until our increment of open count. */	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {		spin_unlock(&oi->ip_lock);		status = -ENOENT;		goto leave;	}	if (mode & O_DIRECT)		oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT;	oi->ip_open_count++;	spin_unlock(&oi->ip_lock);	status = 0;leave:	mlog_exit(status);	return status;}static int ocfs2_file_release(struct inode *inode, struct file *file){	struct ocfs2_inode_info *oi = OCFS2_I(inode);	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,		       file->f_dentry->d_name.len,		       file->f_dentry->d_name.name);	spin_lock(&oi->ip_lock);#ifdef OCFS2_DELETE_INODE_WORKAROUND	/* Do the sync *before* decrementing ip_open_count as	 * otherwise the voting code might allow this inode to be	 * wiped. */	if (oi->ip_open_count == 1 &&	    oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {		spin_unlock(&oi->ip_lock);		write_inode_now(inode, 1);		spin_lock(&oi->ip_lock);	}#endif	if (!--oi->ip_open_count)		oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;	spin_unlock(&oi->ip_lock);	mlog_exit(0);	return 0;}static int ocfs2_sync_file(struct file *file,			   struct dentry *dentry,			   int datasync){	int err = 0;	journal_t *journal;	struct inode *inode = dentry->d_inode;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,		   dentry->d_name.len, dentry->d_name.name);	err = ocfs2_sync_inode(dentry->d_inode);	if (err)		goto bail;	journal = osb->journal->j_journal;	err = journal_force_commit(journal);bail:	mlog_exit(err);	return (err < 0) ? -EIO : 0;}static void ocfs2_update_inode_size(struct inode *inode,				    u64 new_size){	i_size_write(inode, new_size);	inode->i_blocks = ocfs2_align_bytes_to_sectors(new_size);}void ocfs2_file_finish_extension(struct inode *inode,				 loff_t newsize,				 unsigned direct_extend){	int status;	mlog(0, "inode %"MLFu64", newsize = %lld, direct_extend = %u\n",	     OCFS2_I(inode)->ip_blkno, (long long)newsize, direct_extend);	ocfs2_update_inode_size(inode, newsize);#ifdef OCFS2_ORACORE_WORKAROUNDS	if (direct_extend) {		/*		 * This leaves dirty data in holes.		 * Caveat Emptor.		 */		OCFS2_I(inode)->ip_mmu_private = newsize;		return;	}#endif	status = ocfs2_zero_extend(inode);	/*	 * Don't overwrite the result of	 * generic_file_write	 */	if (status)		mlog(ML_ERROR, "Unable to pre-zero extension of inode "		     "(%d)\n", status);}static ssize_t ocfs2_file_write(struct file *filp,				const char __user *buf,				size_t count,				loff_t *ppos){	struct iovec local_iov = { .iov_base = (void __user *)buf,				   .iov_len = count };	int ret = 0;	struct ocfs2_super *osb = NULL;	struct dentry *dentry = filp->f_dentry;	struct inode *inode = dentry->d_inode;	struct ocfs2_write_lock_info info = {0, };	DECLARE_BUFFER_LOCK_CTXT(ctxt);	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,		   (unsigned int)count,		   filp->f_dentry->d_name.len,		   filp->f_dentry->d_name.name);	/* happy write of zero bytes */	if (count == 0) {		ret = 0;		goto bail;	}	if (!inode) {		mlog(0, "bad inode\n");		ret = -EIO;		goto bail;	}	osb = OCFS2_SB(inode->i_sb);	ret = ocfs2_write_lock_maybe_extend(filp, buf, count, ppos, &info,					    &ctxt);	if (ret)		goto bail;	down_read(&OCFS2_I(inode)->ip_alloc_sem);#ifdef OCFS2_ORACORE_WORKAROUNDS	if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) {		unsigned int saved_flags = filp->f_flags;		if (info.wl_do_direct_io)			filp->f_flags |= O_DIRECT;		else			filp->f_flags &= ~O_DIRECT;		ret = generic_file_write_nolock(filp, &local_iov, 1, ppos);		filp->f_flags = saved_flags;	} else#endif		ret = generic_file_write_nolock(filp, &local_iov, 1, ppos);	up_read(&OCFS2_I(inode)->ip_alloc_sem);bail:	/* 	 * if this write created a hole then write zeros into it.. wl_extended	 * is only set if we got the data lock so the buffered zero writing	 * will have lock coverage.  This must be done before unlocking.	 */	if (info.wl_extended)		ocfs2_file_finish_extension(inode, info.wl_newsize,					    info.wl_do_direct_io);	if (info.wl_unlock_ctxt)		ocfs2_unlock_buffer_inodes(&ctxt);	if (info.wl_have_i_mutex)		mutex_unlock(&inode->i_mutex);	mlog_exit(ret);	return ret;}static ssize_t ocfs2_file_read(struct file *filp,			       char __user *buf,			       size_t count,			       loff_t *ppos){	int ret = 0;	struct ocfs2_super *osb = NULL;	struct dentry *dentry = filp->f_dentry;	struct inode *inode = dentry->d_inode;	struct ocfs2_backing_inode *target_binode;	DECLARE_BUFFER_LOCK_CTXT(ctxt);	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf,		   (unsigned int)count,		   filp->f_dentry->d_name.len,		   filp->f_dentry->d_name.name);	if (!inode) {		ret = -EINVAL;		mlog_errno(ret);		goto bail;	}	osb = OCFS2_SB(inode->i_sb);#ifdef OCFS2_ORACORE_WORKAROUNDS	if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) {		if (filp->f_flags & O_DIRECT) {			int sector_size = 1 << osb->s_sectsize_bits;			if (((*ppos) & (sector_size - 1)) ||			    (count & (sector_size - 1)) ||			    ((unsigned long)buf & (sector_size - 1)) ||			    (i_size_read(inode) & (sector_size -1))) {				filp->f_flags &= ~O_DIRECT;			}		}	}#endif	ret = ocfs2_setup_io_locks(inode->i_sb, inode, buf, count, &ctxt,				   &target_binode);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	target_binode->ba_lock_data_level = 0;	ret = ocfs2_lock_buffer_inodes(&ctxt, NULL);	if (ret < 0) {		mlog_errno(ret);		goto bail_unlock;	}	down_read(&OCFS2_I(inode)->ip_alloc_sem);	ret = generic_file_read(filp, buf, count, ppos);	up_read(&OCFS2_I(inode)->ip_alloc_sem);	if (ret == -EINVAL)		mlog(ML_ERROR, "Generic_file_read returned -EINVAL\n");bail_unlock:	ocfs2_unlock_buffer_inodes(&ctxt);bail:	mlog_exit(ret);	return ret;}static ssize_t ocfs2_file_sendfile(struct file *in_file,				   loff_t *ppos,				   size_t count,				   read_actor_t actor,				   void *target){	int ret;	struct inode *inode = in_file->f_mapping->host;	DECLARE_IO_MARKER(io_marker);	mlog_entry("inode %"MLFu64", ppos %lld, count = %u\n",		   OCFS2_I(inode)->ip_blkno, (long long) *ppos,		   (unsigned int) count);	/* Obviously, there is no user buffer to worry about here --	 * this simplifies locking, so no need to walk vmas a la	 * read/write. We take a simple set of cluster locks against	 * the inode and call generic_file_sendfile. */	ret = ocfs2_meta_lock(inode, NULL, NULL, 0);	if (ret < 0) {		mlog_errno(ret);		goto bail;	}	ret = ocfs2_data_lock(inode, 0);	if (ret < 0) {		mlog_errno(ret);		goto bail_unlock_meta;	}	down_read(&OCFS2_I(inode)->ip_alloc_sem);	/*	 * We still need this so readpage doesn't throw an error.	 */	ocfs2_add_io_marker(inode, &io_marker);	ret = generic_file_sendfile(in_file, ppos, count, actor, target);	ocfs2_del_io_marker(inode, &io_marker);	up_read(&OCFS2_I(inode)->ip_alloc_sem);	ocfs2_data_unlock(inode, 0);bail_unlock_meta:	ocfs2_meta_unlock(inode, 0);bail:	mlog_exit(ret);	return ret;}struct file_operations ocfs2_fops = {	.read		= ocfs2_file_read,	.write		= ocfs2_file_write,	.sendfile	= ocfs2_file_sendfile,	.mmap		= ocfs2_mmap,	.fsync		= ocfs2_sync_file,	.release	= ocfs2_file_release,	.open		= ocfs2_file_open,	.aio_read	= ocfs2_file_aio_read,	.aio_write	= ocfs2_file_aio_write,};struct file_operations ocfs2_dops = {	.read		= generic_read_dir,	.readdir	= ocfs2_readdir,	.fsync		= ocfs2_sync_file,};int ocfs2_set_inode_size(struct ocfs2_journal_handle *handle,			 struct inode *inode,			 struct buffer_head *fe_bh,			 u64 new_i_size){	int status, grow;	mlog_entry_void();	grow = new_i_size > inode->i_size;	i_size_write(inode, new_i_size);	inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);	inode->i_ctime = inode->i_mtime = CURRENT_TIME;	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);	if (status < 0) {		mlog_errno(status);		goto bail;	}	/* FIXME: I think this should all be in the caller */	spin_lock(&OCFS2_I(inode)->ip_lock);	if (!grow)		OCFS2_I(inode)->ip_mmu_private = i_size_read(inode);	spin_unlock(&OCFS2_I(inode)->ip_lock);bail:	mlog_exit(status);	return status;}static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,				     struct inode *inode,				     struct buffer_head *fe_bh,				     u64 new_i_size){	int status;	struct ocfs2_journal_handle *handle;	mlog_entry_void();	/* TODO: This needs to actually orphan the inode in this	 * transaction. */	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);	if (IS_ERR(handle)) {		status = PTR_ERR(handle);		mlog_errno(status);		goto out;	}	status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size);	if (status < 0)		mlog_errno(status);	ocfs2_commit_trans(handle);out:	mlog_exit(status);	return status;}static int ocfs2_truncate_file(struct ocfs2_super *osb,			       u64 new_i_size,			       struct inode *inode){	int status = 0;	struct ocfs2_dinode *fe = NULL;	struct buffer_head *fe_bh = NULL;	struct ocfs2_journal_handle *handle = NULL;	struct ocfs2_truncate_context *tc = NULL;	mlog_entry("(inode = %"MLFu64", new_i_size = %"MLFu64"\n",		   OCFS2_I(inode)->ip_blkno, new_i_size);	truncate_inode_pages(inode->i_mapping, new_i_size);	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &fe_bh,				  OCFS2_BH_CACHED, inode);	if (status < 0) {		mlog_errno(status);		goto bail;	}	fe = (struct ocfs2_dinode *) fe_bh->b_data;	if (!OCFS2_IS_VALID_DINODE(fe)) {		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);		status = -EIO;		goto bail;	}	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),			"Inode %"MLFu64", inode i_size = %lld != di "			"i_size = %"MLFu64", i_flags = 0x%x\n",			OCFS2_I(inode)->ip_blkno,			i_size_read(inode),			le64_to_cpu(fe->i_size), le32_to_cpu(fe->i_flags));	if (new_i_size > le64_to_cpu(fe->i_size)) {		mlog(0, "asked to truncate file with size (%"MLFu64") "		     "to size (%"MLFu64")!\n",		     le64_to_cpu(fe->i_size), new_i_size);		status = -EINVAL;		mlog_errno(status);		goto bail;	}	mlog(0, "inode %"MLFu64", i_size = %"MLFu64", new_i_size = %"MLFu64"\n",	     le64_to_cpu(fe->i_blkno), le64_to_cpu(fe->i_size), new_i_size);	/* lets handle the simple truncate cases before doing any more	 * cluster locking. */	if (new_i_size == le64_to_cpu(fe->i_size))		goto bail;	/* This forces other nodes to sync and drop their pages. Do	 * this even if we have a truncate without allocation change -	 * ocfs2 cluster sizes can be much greater than page size, so	 * we have to truncate them anyway.  */	status = ocfs2_data_lock(inode, 1);	if (status < 0) {		mlog_errno(status);		goto bail;	}	ocfs2_data_unlock(inode, 1);	if (le32_to_cpu(fe->i_clusters) ==	    ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {		mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",		     fe->i_clusters);		/* No allocation change is required, so lets fast path		 * this truncate. */		handle = ocfs2_start_trans(osb, NULL,					  OCFS2_INODE_UPDATE_CREDITS);		if (IS_ERR(handle)) {			status = PTR_ERR(handle);			handle = NULL;			mlog_errno(status);			goto bail;		}		status = ocfs2_set_inode_size(handle, inode, fe_bh,					      new_i_size);		if (status < 0)			mlog_errno(status);		goto bail;	}	/* alright, we're going to need to do a full blown alloc size	 * change. Orphan the inode so that recovery can complete the	 * truncate if necessary. This does the task of marking	 * i_size. */	status = ocfs2_orphan_for_truncate(osb, inode, fe_bh, new_i_size);	if (status < 0) {		mlog_errno(status);		goto bail;	}	status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);	if (status < 0) {		mlog_errno(status);		goto bail;	}	status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);	if (status < 0) {		mlog_errno(status);		goto bail;	}	/* TODO: orphan dir cleanup here. */bail:	if (handle)		ocfs2_commit_trans(handle);	if (fe_bh)		brelse(fe_bh);	mlog_exit(status);	return status;}static int ocfs2_zero_extend(struct inode *inode){	struct address_space *mapping = inode->i_mapping;	struct page *page;	u64 size = i_size_read(inode) - 1;	unsigned int offset;	int res = 0;	/* Start the zeroing of blocks */	if (i_size_read(inode) > OCFS2_I(inode)->ip_mmu_private) {		page = grab_cache_page(mapping,
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -