📄 xfs_buf.c

📁 linux2.6.16版本
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write the Free Software Foundation, * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */#include <linux/stddef.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/bio.h>#include <linux/sysctl.h>#include <linux/proc_fs.h>#include <linux/workqueue.h>#include <linux/percpu.h>#include <linux/blkdev.h>#include <linux/hash.h>#include <linux/kthread.h>#include "xfs_linux.h"STATIC kmem_zone_t *xfs_buf_zone;STATIC kmem_shaker_t xfs_buf_shake;STATIC int xfsbufd(void *);STATIC int xfsbufd_wakeup(int, gfp_t);STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);STATIC struct workqueue_struct *xfslogd_workqueue;struct workqueue_struct *xfsdatad_workqueue;#ifdef XFS_BUF_TRACEvoidxfs_buf_trace(	xfs_buf_t	*bp,	char		*id,	void		*data,	void		*ra){	ktrace_enter(xfs_buf_trace_buf,		bp, id,		(void *)(unsigned long)bp->b_flags,		(void *)(unsigned long)bp->b_hold.counter,		(void *)(unsigned long)bp->b_sema.count.counter,		(void *)current,		data, ra,		(void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),		(void *)(unsigned long)(bp->b_file_offset & 0xffffffff),		(void *)(unsigned long)bp->b_buffer_length,		NULL, NULL, NULL, NULL, NULL);}ktrace_t *xfs_buf_trace_buf;#define XFS_BUF_TRACE_SIZE	4096#define XB_TRACE(bp, id, data)	\	xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))#else#define XB_TRACE(bp, id, data)	do { } while (0)#endif#ifdef XFS_BUF_LOCK_TRACKING# define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)# define XB_CLEAR_OWNER(bp)	((bp)->b_last_holder = -1)# define XB_GET_OWNER(bp)	((bp)->b_last_holder)#else# define XB_SET_OWNER(bp)	do { } while (0)# define XB_CLEAR_OWNER(bp)	do { } while (0)# define XB_GET_OWNER(bp)	do { } while (0)#endif#define xb_to_gfp(flags) \	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \	  ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)#define xb_to_km(flags) \	 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)#define xfs_buf_allocate(flags) \	kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))#define xfs_buf_deallocate(bp) \	kmem_zone_free(xfs_buf_zone, (bp));/* *	Page Region interfaces. * *	For pages in filesystems where the blocksize is smaller than the *	pagesize, we use the page->private field (long) to hold a bitmap * 	of uptodate regions within the page. * *	Each such region is "bytes per page / bits per long" bytes long. * *	NBPPR == number-of-bytes-per-page-region *	BTOPR == bytes-to-page-region (rounded up) *	BTOPRT == bytes-to-page-region-truncated (rounded down) */#if (BITS_PER_LONG == 32)#define PRSHIFT		(PAGE_CACHE_SHIFT - 5)	/* (32 == 1<<5) */#elif (BITS_PER_LONG == 64)#define PRSHIFT		(PAGE_CACHE_SHIFT - 6)	/* (64 == 1<<6) */#else#error BITS_PER_LONG must be 32 or 64#endif#define NBPPR		(PAGE_CACHE_SIZE/BITS_PER_LONG)#define BTOPR(b)	(((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)#define BTOPRT(b)	(((unsigned int)(b) >> PRSHIFT))STATIC unsigned longpage_region_mask(	size_t		offset,	size_t		length){	unsigned long	mask;	int		first, final;	first = BTOPR(offset);	final = BTOPRT(offset + length - 1);	first = min(first, final);	mask = ~0UL;	mask <<= BITS_PER_LONG - (final - first);	mask >>= BITS_PER_LONG - (final);	ASSERT(offset + length <= PAGE_CACHE_SIZE);	ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);	return mask;}STATIC inline voidset_page_region(	struct page	*page,	size_t		offset,	size_t		length){	set_page_private(page,		page_private(page) | page_region_mask(offset, length));	if (page_private(page) == ~0UL)		SetPageUptodate(page);}STATIC inline inttest_page_region(	struct page	*page,	size_t		offset,	size_t		length){	unsigned long	mask = page_region_mask(offset, length);	return (mask && (page_private(page) & mask) == mask);}/* *	Mapping of multi-page buffers into contiguous virtual space */typedef struct a_list {	void		*vm_addr;	struct a_list	*next;} a_list_t;STATIC a_list_t		*as_free_head;STATIC int		as_list_len;STATIC DEFINE_SPINLOCK(as_lock);/* *	Try to batch vunmaps because they are costly. */STATIC voidfree_address(	void		*addr){	a_list_t	*aentry;	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);	if (likely(aentry)) {		spin_lock(&as_lock);		aentry->next = as_free_head;		aentry->vm_addr = addr;		as_free_head = aentry;		as_list_len++;		spin_unlock(&as_lock);	} else {		vunmap(addr);	}}STATIC voidpurge_addresses(void){	a_list_t	*aentry, *old;	if (as_free_head == NULL)		return;	spin_lock(&as_lock);	aentry = as_free_head;	as_free_head = NULL;	as_list_len = 0;	spin_unlock(&as_lock);	while ((old = aentry) != NULL) {		vunmap(aentry->vm_addr);		aentry = aentry->next;		kfree(old);	}}/* *	Internal xfs_buf_t object manipulation */STATIC void_xfs_buf_initialize(	xfs_buf_t		*bp,	xfs_buftarg_t		*target,	xfs_off_t		range_base,	size_t			range_length,	xfs_buf_flags_t		flags){	/*	 * We don't want certain flags to appear in b_flags.	 */	flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);	memset(bp, 0, sizeof(xfs_buf_t));	atomic_set(&bp->b_hold, 1);	init_MUTEX_LOCKED(&bp->b_iodonesema);	INIT_LIST_HEAD(&bp->b_list);	INIT_LIST_HEAD(&bp->b_hash_list);	init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */	XB_SET_OWNER(bp);	bp->b_target = target;	bp->b_file_offset = range_base;	/*	 * Set buffer_length and count_desired to the same value initially.	 * I/O routines should use count_desired, which will be the same in	 * most cases but may be reset (e.g. XFS recovery).	 */	bp->b_buffer_length = bp->b_count_desired = range_length;	bp->b_flags = flags;	bp->b_bn = XFS_BUF_DADDR_NULL;	atomic_set(&bp->b_pin_count, 0);	init_waitqueue_head(&bp->b_waiters);	XFS_STATS_INC(xb_create);	XB_TRACE(bp, "initialize", target);}/* *	Allocate a page array capable of holding a specified number *	of pages, and point the page buf at it. */STATIC int_xfs_buf_get_pages(	xfs_buf_t		*bp,	int			page_count,	xfs_buf_flags_t		flags){	/* Make sure that we have a page list */	if (bp->b_pages == NULL) {		bp->b_offset = xfs_buf_poff(bp->b_file_offset);		bp->b_page_count = page_count;		if (page_count <= XB_PAGES) {			bp->b_pages = bp->b_page_array;		} else {			bp->b_pages = kmem_alloc(sizeof(struct page *) *					page_count, xb_to_km(flags));			if (bp->b_pages == NULL)				return -ENOMEM;		}		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);	}	return 0;}/* *	Frees b_pages if it was allocated. */STATIC void_xfs_buf_free_pages(	xfs_buf_t	*bp){	if (bp->b_pages != bp->b_page_array) {		kmem_free(bp->b_pages,			  bp->b_page_count * sizeof(struct page *));	}}/* *	Releases the specified buffer. * * 	The modification state of any associated pages is left unchanged. * 	The buffer most not be on any hash - use xfs_buf_rele instead for * 	hashed and refcounted buffers */voidxfs_buf_free(	xfs_buf_t		*bp){	XB_TRACE(bp, "free", 0);	ASSERT(list_empty(&bp->b_hash_list));	if (bp->b_flags & _XBF_PAGE_CACHE) {		uint		i;		if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))			free_address(bp->b_addr - bp->b_offset);		for (i = 0; i < bp->b_page_count; i++)			page_cache_release(bp->b_pages[i]);		_xfs_buf_free_pages(bp);	} else if (bp->b_flags & _XBF_KMEM_ALLOC) {		 /*		  * XXX(hch): bp->b_count_desired might be incorrect (see		  * xfs_buf_associate_memory for details), but fortunately		  * the Linux version of kmem_free ignores the len argument..		  */		kmem_free(bp->b_addr, bp->b_count_desired);		_xfs_buf_free_pages(bp);	}	xfs_buf_deallocate(bp);}/* *	Finds all pages for buffer in question and builds it's page list. */STATIC int_xfs_buf_lookup_pages(	xfs_buf_t		*bp,	uint			flags){	struct address_space	*mapping = bp->b_target->bt_mapping;	size_t			blocksize = bp->b_target->bt_bsize;	size_t			size = bp->b_count_desired;	size_t			nbytes, offset;	gfp_t			gfp_mask = xb_to_gfp(flags);	unsigned short		page_count, i;	pgoff_t			first;	xfs_off_t		end;	int			error;	end = bp->b_file_offset + bp->b_buffer_length;	page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);	error = _xfs_buf_get_pages(bp, page_count, flags);	if (unlikely(error))		return error;	bp->b_flags |= _XBF_PAGE_CACHE;	offset = bp->b_offset;	first = bp->b_file_offset >> PAGE_CACHE_SHIFT;	for (i = 0; i < bp->b_page_count; i++) {		struct page	*page;		uint		retries = 0;	      retry:		page = find_or_create_page(mapping, first + i, gfp_mask);		if (unlikely(page == NULL)) {			if (flags & XBF_READ_AHEAD) {				bp->b_page_count = i;				for (i = 0; i < bp->b_page_count; i++)					unlock_page(bp->b_pages[i]);				return -ENOMEM;			}			/*			 * This could deadlock.			 *			 * But until all the XFS lowlevel code is revamped to			 * handle buffer allocation failures we can't do much.			 */			if (!(++retries % 100))				printk(KERN_ERR					"XFS: possible memory allocation "					"deadlock in %s (mode:0x%x)\n",					__FUNCTION__, gfp_mask);			XFS_STATS_INC(xb_page_retries);			xfsbufd_wakeup(0, gfp_mask);			blk_congestion_wait(WRITE, HZ/50);			goto retry;		}		XFS_STATS_INC(xb_page_found);		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);		size -= nbytes;		if (!PageUptodate(page)) {			page_count--;			if (blocksize >= PAGE_CACHE_SIZE) {				if (flags & XBF_READ)					bp->b_locked = 1;			} else if (!PagePrivate(page)) {				if (test_page_region(page, offset, nbytes))					page_count++;			}		}		bp->b_pages[i] = page;		offset = 0;	}	if (!bp->b_locked) {		for (i = 0; i < bp->b_page_count; i++)			unlock_page(bp->b_pages[i]);	}	if (page_count == bp->b_page_count)		bp->b_flags |= XBF_DONE;	XB_TRACE(bp, "lookup_pages", (long)page_count);	return error;}/* *	Map buffer into kernel address-space if nessecary. */STATIC int_xfs_buf_map_pages(	xfs_buf_t		*bp,	uint			flags){	/* A single page buffer is always mappable */	if (bp->b_page_count == 1) {		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;		bp->b_flags |= XBF_MAPPED;	} else if (flags & XBF_MAPPED) {		if (as_list_len > 64)			purge_addresses();		bp->b_addr = vmap(bp->b_pages, bp->b_page_count,					VM_MAP, PAGE_KERNEL);		if (unlikely(bp->b_addr == NULL))			return -ENOMEM;		bp->b_addr += bp->b_offset;		bp->b_flags |= XBF_MAPPED;	}	return 0;}/* *	Finding and Reading Buffers *//* *	Look up, and creates if absent, a lockable buffer for *	a given range of an inode.  The buffer is returned *	locked.	 If other overlapping buffers exist, they are *	released before the new buffer is created and locked, *	which may imply that this call will block until those buffers *	are unlocked.  No I/O is implied by this call. */xfs_buf_t *_xfs_buf_find(	xfs_buftarg_t		*btp,	/* block device target		*/	xfs_off_t		ioff,	/* starting offset of range	*/	size_t			isize,	/* length of range		*/	xfs_buf_flags_t		flags,	xfs_buf_t		*new_bp){	xfs_off_t		range_base;	size_t			range_length;	xfs_bufhash_t		*hash;	xfs_buf_t		*bp, *n;	range_base = (ioff << BBSHIFT);	range_length = (isize << BBSHIFT);	/* Check for IOs smaller than the sector size / not sector aligned */	ASSERT(!(range_length < (1 << btp->bt_sshift)));	ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));	hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];	spin_lock(&hash->bh_lock);	list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {		ASSERT(btp == bp->b_target);		if (bp->b_file_offset == range_base &&		    bp->b_buffer_length == range_length) {			/*			 * If we look at something, bring it to the			 * front of the list for next time.			 */			atomic_inc(&bp->b_hold);			list_move(&bp->b_hash_list, &hash->bh_list);			goto found;		}	}	/* No match found */	if (new_bp) {		_xfs_buf_initialize(new_bp, btp, range_base,				range_length, flags);		new_bp->b_hash = hash;		list_add(&new_bp->b_hash_list, &hash->bh_list);	} else {		XFS_STATS_INC(xb_miss_locked);	}	spin_unlock(&hash->bh_lock);	return new_bp;found:	spin_unlock(&hash->bh_lock);	/* Attempt to get the semaphore without sleeping,	 * if this does not work then we need to drop the	 * spinlock and do a hard attempt on the semaphore.	 */	if (down_trylock(&bp->b_sema)) {		if (!(flags & XBF_TRYLOCK)) {			/* wait for buffer ownership */			XB_TRACE(bp, "get_lock", 0);			xfs_buf_lock(bp);			XFS_STATS_INC(xb_get_locked_waited);		} else {			/* We asked for a trylock and failed, no need			 * to look at file offset and length here, we			 * know that this buffer at least overlaps our			 * buffer and is locked, therefore our buffer			 * either does not exist, or is this buffer.			 */			xfs_buf_rele(bp);			XFS_STATS_INC(xb_busy_locked);			return NULL;		}	} else {		/* trylock worked */		XB_SET_OWNER(bp);	}	if (bp->b_flags & XBF_STALE) {		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);		bp->b_flags &= XBF_MAPPED;	}	XB_TRACE(bp, "got_lock", 0);	XFS_STATS_INC(xb_get_locked);	return bp;}/* *	Assembles a buffer covering the specified range. *	Storage in memory for all portions of the buffer will be allocated, *	although backing storage may not be. */xfs_buf_t *xfs_buf_get_flags(	xfs_buftarg_t		*target,/* target for buffer		*/	xfs_off_t		ioff,	/* starting offset of range	*/	size_t			isize,	/* length of range		*/	xfs_buf_flags_t		flags){	xfs_buf_t		*bp, *new_bp;	int			error = 0, i;	new_bp = xfs_buf_allocate(flags);	if (unlikely(!new_bp))		return NULL;	bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);	if (bp == new_bp) {		error = _xfs_buf_lookup_pages(bp, flags);		if (error)			goto no_buffer;	} else {		xfs_buf_deallocate(new_bp);		if (unlikely(bp == NULL))			return NULL;	}	for (i = 0; i < bp->b_page_count; i++)		mark_page_accessed(bp->b_pages[i]);	if (!(bp->b_flags & XBF_MAPPED)) {		error = _xfs_buf_map_pages(bp, flags);		if (unlikely(error)) {			printk(KERN_WARNING "%s: failed to map pages\n",					__FUNCTION__);			goto no_buffer;		}	}	XFS_STATS_INC(xb_get);	/*	 * Always fill in the block number now, the mapped cases can do	 * their own overlay of this later.	 */	bp->b_bn = ioff;	bp->b_count_desired = bp->b_buffer_length;	XB_TRACE(bp, "get", (unsigned long)flags);	return bp; no_buffer:	if (flags & (XBF_LOCK | XBF_TRYLOCK))		xfs_buf_unlock(bp);	xfs_buf_rele(bp);	return NULL;}xfs_buf_t *xfs_buf_read_flags(	xfs_buftarg_t		*target,
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -