xfs_lrw.c

来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,029 行 · 第 1/2 页

C
1,029
字号
/* * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Further, this software is distributed without any warranty that it is * free of the rightful claim of any third person regarding infringement * or the like.  Any license provided herein, whether implied or * otherwise, applies only to this software file.  Patent licenses, if * any, provided herein do not apply to combinations of this program with * other software, or any other product whatsoever. * * You should have received a copy of the GNU General Public License along * with this program; if not, write the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston MA 02111-1307, USA. * * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, * Mountain View, CA  94043, or: * * http://www.sgi.com * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ *//* *  fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff) * */#include "xfs.h"#include "xfs_fs.h"#include "xfs_inum.h"#include "xfs_log.h"#include "xfs_trans.h"#include "xfs_sb.h"#include "xfs_ag.h"#include "xfs_dir.h"#include "xfs_dir2.h"#include "xfs_alloc.h"#include "xfs_dmapi.h"#include "xfs_quota.h"#include "xfs_mount.h"#include "xfs_alloc_btree.h"#include "xfs_bmap_btree.h"#include "xfs_ialloc_btree.h"#include "xfs_btree.h"#include "xfs_ialloc.h"#include "xfs_attr_sf.h"#include "xfs_dir_sf.h"#include "xfs_dir2_sf.h"#include "xfs_dinode.h"#include "xfs_inode.h"#include "xfs_bmap.h"#include "xfs_bit.h"#include "xfs_rtalloc.h"#include "xfs_error.h"#include "xfs_itable.h"#include "xfs_rw.h"#include "xfs_acl.h"#include "xfs_cap.h"#include "xfs_mac.h"#include "xfs_attr.h"#include "xfs_inode_item.h"#include "xfs_buf_item.h"#include "xfs_utils.h"#include "xfs_iomap.h"#include <linux/capability.h>#if defined(XFS_RW_TRACE)voidxfs_rw_enter_trace(	int			tag,	xfs_iocore_t		*io,	const struct iovec	*iovp,	size_t			segs,	loff_t			offset,	int			ioflags){	xfs_inode_t	*ip = XFS_IO_INODE(io);	if (ip->i_rwtrace == NULL)		return;	ktrace_enter(ip->i_rwtrace,		(void *)(unsigned long)tag,		(void *)ip,		(void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),		(void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),		(void *)(__psint_t)iovp,		(void *)((unsigned long)segs),		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),		(void *)((unsigned long)(offset & 0xffffffff)),		(void *)((unsigned long)ioflags),		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),		(void *)NULL,		(void *)NULL,		(void *)NULL,		(void *)NULL,		(void *)NULL);}voidxfs_inval_cached_trace(	xfs_iocore_t	*io,	xfs_off_t	offset,	xfs_off_t	len,	xfs_off_t	first,	xfs_off_t	last){	xfs_inode_t	*ip = XFS_IO_INODE(io);	if (ip->i_rwtrace == NULL)		return;	ktrace_enter(ip->i_rwtrace,		(void *)(__psint_t)XFS_INVAL_CACHED,		(void *)ip,		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),		(void *)((unsigned long)(offset & 0xffffffff)),		(void *)((unsigned long)((len >> 32) & 0xffffffff)),		(void *)((unsigned long)(len & 0xffffffff)),		(void *)((unsigned long)((first >> 32) & 0xffffffff)),		(void *)((unsigned long)(first & 0xffffffff)),		(void *)((unsigned long)((last >> 32) & 0xffffffff)),		(void *)((unsigned long)(last & 0xffffffff)),		(void *)NULL,		(void *)NULL,		(void *)NULL,		(void *)NULL,		(void *)NULL,		(void *)NULL);}#endif/* *	xfs_iozero * *	xfs_iozero clears the specified range of buffer supplied, *	and marks all the affected blocks as valid and modified.  If *	an affected block is not allocated, it will be allocated.  If *	an affected block is not completely overwritten, and is not *	valid before the operation, it will be read from disk before *	being partially zeroed. */STATIC intxfs_iozero(	struct inode		*ip,	/* inode			*/	loff_t			pos,	/* offset in file		*/	size_t			count,	/* size of data to zero		*/	loff_t			end_size)	/* max file size to set */{	unsigned		bytes;	struct page		*page;	struct address_space	*mapping;	char			*kaddr;	int			status;	mapping = ip->i_mapping;	do {		unsigned long index, offset;		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */		index = pos >> PAGE_CACHE_SHIFT;		bytes = PAGE_CACHE_SIZE - offset;		if (bytes > count)			bytes = count;		status = -ENOMEM;		page = grab_cache_page(mapping, index);		if (!page)			break;		kaddr = kmap(page);		status = mapping->a_ops->prepare_write(NULL, page, offset,							offset + bytes);		if (status) {			goto unlock;		}		memset((void *) (kaddr + offset), 0, bytes);		flush_dcache_page(page);		status = mapping->a_ops->commit_write(NULL, page, offset,							offset + bytes);		if (!status) {			pos += bytes;			count -= bytes;			if (pos > i_size_read(ip))				i_size_write(ip, pos < end_size ? pos : end_size);		}unlock:		kunmap(page);		unlock_page(page);		page_cache_release(page);		if (status)			break;	} while (count);	return (-status);}/* * xfs_inval_cached_pages *  * This routine is responsible for keeping direct I/O and buffered I/O * somewhat coherent.  From here we make sure that we're at least * temporarily holding the inode I/O lock exclusively and then call * the page cache to flush and invalidate any cached pages.  If there * are no cached pages this routine will be very quick. */voidxfs_inval_cached_pages(	vnode_t		*vp,	xfs_iocore_t	*io,	xfs_off_t	offset,	int		write,	int		relock){	xfs_mount_t	*mp;	if (!VN_CACHED(vp)) {		return;	}	mp = io->io_mount;	/*	 * We need to get the I/O lock exclusively in order	 * to safely invalidate pages and mappings.	 */	if (relock) {		XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED);		XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL);	}	/* Writing beyond EOF creates a hole that must be zeroed */	if (write && (offset > XFS_SIZE(mp, io))) {		xfs_fsize_t	isize;		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);		isize = XFS_SIZE(mp, io);		if (offset > isize) {			xfs_zero_eof(vp, io, offset, isize, offset);		}		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);	}	xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);	VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);	if (relock) {		XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);	}}ssize_t			/* bytes read, or (-)  error */xfs_read(	bhv_desc_t		*bdp,	struct kiocb		*iocb,	const struct iovec	*iovp,	unsigned int		segs,	loff_t			*offset,	int			ioflags,	cred_t			*credp){	struct file		*file = iocb->ki_filp;	size_t			size = 0;	ssize_t			ret;	xfs_fsize_t		n;	xfs_inode_t		*ip;	xfs_mount_t		*mp;	vnode_t			*vp;	unsigned long		seg;	ip = XFS_BHVTOI(bdp);	vp = BHV_TO_VNODE(bdp);	mp = ip->i_mount;	XFS_STATS_INC(xs_read_calls);	/* START copy & waste from filemap.c */	for (seg = 0; seg < segs; seg++) {		const struct iovec *iv = &iovp[seg];		/*		 * If any segment has a negative length, or the cumulative		 * length ever wraps negative then return -EINVAL.		 */		size += iv->iov_len;		if (unlikely((ssize_t)(size|iv->iov_len) < 0))			return XFS_ERROR(-EINVAL);	}	/* END copy & waste from filemap.c */	if (ioflags & IO_ISDIRECT) {		xfs_buftarg_t	*target =			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?				mp->m_rtdev_targp : mp->m_ddev_targp;		if ((*offset & target->pbr_smask) ||		    (size & target->pbr_smask)) {			if (*offset == ip->i_d.di_size) {				return (0);			}			return -XFS_ERROR(EINVAL);		}	}	n = XFS_MAXIOFFSET(mp) - *offset;	if ((n <= 0) || (size == 0))		return 0;	if (n < size)		size = n;	if (XFS_FORCED_SHUTDOWN(mp)) {		return -EIO;	}	/* OK so we are holding the I/O lock for the duration	 * of the submission, then what happens if the I/O	 * does not really happen here, but is scheduled 	 * later?	 */	xfs_ilock(ip, XFS_IOLOCK_SHARED);	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&	    !(ioflags & IO_INVIS)) {		vrwlock_t locktype = VRWLOCK_READ;		ret = XFS_SEND_DATA(mp, DM_EVENT_READ,					BHV_TO_VNODE(bdp), *offset, size,					FILP_DELAY_FLAG(file), &locktype);		if (ret) {			xfs_iunlock(ip, XFS_IOLOCK_SHARED);			return -ret;		}	}	xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,				iovp, segs, *offset, ioflags);	ret = __generic_file_aio_read(iocb, iovp, segs, offset);	xfs_iunlock(ip, XFS_IOLOCK_SHARED);	if (ret > 0)		XFS_STATS_ADD(xs_read_bytes, ret);	if (likely(!(ioflags & IO_INVIS)))		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);	return ret;}ssize_txfs_sendfile(	bhv_desc_t		*bdp,	struct file		*filp,	loff_t			*offset,	int			ioflags,	size_t			count,	read_actor_t		actor,	void			*target,	cred_t			*credp){	ssize_t			ret;	xfs_fsize_t		n;	xfs_inode_t		*ip;	xfs_mount_t		*mp;	vnode_t			*vp;	ip = XFS_BHVTOI(bdp);	vp = BHV_TO_VNODE(bdp);	mp = ip->i_mount;	XFS_STATS_INC(xs_read_calls);	n = XFS_MAXIOFFSET(mp) - *offset;	if ((n <= 0) || (count == 0))		return 0;	if (n < count)		count = n;	if (XFS_FORCED_SHUTDOWN(ip->i_mount))		return -EIO;	xfs_ilock(ip, XFS_IOLOCK_SHARED);	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&	    (!(ioflags & IO_INVIS))) {		vrwlock_t locktype = VRWLOCK_READ;		int error;		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,				      FILP_DELAY_FLAG(filp), &locktype);		if (error) {			xfs_iunlock(ip, XFS_IOLOCK_SHARED);			return -error;		}	}	xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,				target, count, *offset, ioflags);	ret = generic_file_sendfile(filp, offset, count, actor, target);	xfs_iunlock(ip, XFS_IOLOCK_SHARED);	XFS_STATS_ADD(xs_read_bytes, ret);	xfs_ichgtime(ip, XFS_ICHGTIME_ACC);	return ret;}/* * This routine is called to handle zeroing any space in the last * block of the file that is beyond the EOF.  We do this since the * size is being increased without writing anything to that block * and we don't want anyone to read the garbage on the disk. */STATIC int				/* error (positive) */xfs_zero_last_block(	struct inode	*ip,	xfs_iocore_t	*io,	xfs_off_t	offset,	xfs_fsize_t	isize,	xfs_fsize_t	end_size){	xfs_fileoff_t	last_fsb;	xfs_mount_t	*mp;	int		nimaps;	int		zero_offset;	int		zero_len;	int		isize_fsb_offset;	int		error = 0;	xfs_bmbt_irec_t	imap;	loff_t		loff;	size_t		lsize;	ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);	ASSERT(offset > isize);	mp = io->io_mount;	isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);	if (isize_fsb_offset == 0) {		/*		 * There are no extra bytes in the last block on disk to		 * zero, so return.		 */		return 0;	}	last_fsb = XFS_B_TO_FSBT(mp, isize);	nimaps = 1;	error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,			  &nimaps, NULL);	if (error) {		return error;	}	ASSERT(nimaps > 0);	/*	 * If the block underlying isize is just a hole, then there	 * is nothing to zero.	 */	if (imap.br_startblock == HOLESTARTBLOCK) {		return 0;	}	/*	 * Zero the part of the last block beyond the EOF, and write it	 * out sync.  We need to drop the ilock while we do this so we	 * don't deadlock when the buffer cache calls back to us.	 */	XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);	loff = XFS_FSB_TO_B(mp, last_fsb);	lsize = XFS_FSB_TO_B(mp, 1);	zero_offset = isize_fsb_offset;	zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;	error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);	XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);	ASSERT(error >= 0);	return error;}/* * Zero any on disk space between the current EOF and the new, * larger EOF.  This handles the normal case of zeroing the remainder * of the last block in the file and the unusual case of zeroing blocks * out beyond the size of the file.  This second case only happens * with fixed size extents and when the system crashes before the inode * size was updated but after blocks were allocated.  If fill is set, * then any holes in the range are filled and zeroed.  If not, the holes * are left alone as holes. */int					/* error (positive) */xfs_zero_eof(	vnode_t		*vp,	xfs_iocore_t	*io,	xfs_off_t	offset,		/* starting I/O offset */	xfs_fsize_t	isize,		/* current inode size */	xfs_fsize_t	end_size)	/* terminal inode size */{	struct inode	*ip = LINVFS_GET_IP(vp);	xfs_fileoff_t	start_zero_fsb;	xfs_fileoff_t	end_zero_fsb;	xfs_fileoff_t	prev_zero_fsb;	xfs_fileoff_t	zero_count_fsb;	xfs_fileoff_t	last_fsb;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?