📄 xfs_buf.c

📁 linux-2.6.15.6
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write the Free Software Foundation, * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */#include <linux/stddef.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/bio.h>#include <linux/sysctl.h>#include <linux/proc_fs.h>#include <linux/workqueue.h>#include <linux/percpu.h>#include <linux/blkdev.h>#include <linux/hash.h>#include <linux/kthread.h>#include "xfs_linux.h"STATIC kmem_cache_t *pagebuf_zone;STATIC kmem_shaker_t pagebuf_shake;STATIC int xfsbufd_wakeup(int, gfp_t);STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);STATIC struct workqueue_struct *xfslogd_workqueue;struct workqueue_struct *xfsdatad_workqueue;#ifdef PAGEBUF_TRACEvoidpagebuf_trace(	xfs_buf_t	*pb,	char		*id,	void		*data,	void		*ra){	ktrace_enter(pagebuf_trace_buf,		pb, id,		(void *)(unsigned long)pb->pb_flags,		(void *)(unsigned long)pb->pb_hold.counter,		(void *)(unsigned long)pb->pb_sema.count.counter,		(void *)current,		data, ra,		(void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),		(void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),		(void *)(unsigned long)pb->pb_buffer_length,		NULL, NULL, NULL, NULL, NULL);}ktrace_t *pagebuf_trace_buf;#define PAGEBUF_TRACE_SIZE	4096#define PB_TRACE(pb, id, data)	\	pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))#else#define PB_TRACE(pb, id, data)	do { } while (0)#endif#ifdef PAGEBUF_LOCK_TRACKING# define PB_SET_OWNER(pb)	((pb)->pb_last_holder = current->pid)# define PB_CLEAR_OWNER(pb)	((pb)->pb_last_holder = -1)# define PB_GET_OWNER(pb)	((pb)->pb_last_holder)#else# define PB_SET_OWNER(pb)	do { } while (0)# define PB_CLEAR_OWNER(pb)	do { } while (0)# define PB_GET_OWNER(pb)	do { } while (0)#endif#define pb_to_gfp(flags) \	((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \	  ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)#define pb_to_km(flags) \	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)#define pagebuf_allocate(flags) \	kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))#define pagebuf_deallocate(pb) \	kmem_zone_free(pagebuf_zone, (pb));/* * Page Region interfaces. * * For pages in filesystems where the blocksize is smaller than the * pagesize, we use the page->private field (long) to hold a bitmap * of uptodate regions within the page. * * Each such region is "bytes per page / bits per long" bytes long. * * NBPPR == number-of-bytes-per-page-region * BTOPR == bytes-to-page-region (rounded up) * BTOPRT == bytes-to-page-region-truncated (rounded down) */#if (BITS_PER_LONG == 32)#define PRSHIFT		(PAGE_CACHE_SHIFT - 5)	/* (32 == 1<<5) */#elif (BITS_PER_LONG == 64)#define PRSHIFT		(PAGE_CACHE_SHIFT - 6)	/* (64 == 1<<6) */#else#error BITS_PER_LONG must be 32 or 64#endif#define NBPPR		(PAGE_CACHE_SIZE/BITS_PER_LONG)#define BTOPR(b)	(((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)#define BTOPRT(b)	(((unsigned int)(b) >> PRSHIFT))STATIC unsigned longpage_region_mask(	size_t		offset,	size_t		length){	unsigned long	mask;	int		first, final;	first = BTOPR(offset);	final = BTOPRT(offset + length - 1);	first = min(first, final);	mask = ~0UL;	mask <<= BITS_PER_LONG - (final - first);	mask >>= BITS_PER_LONG - (final);	ASSERT(offset + length <= PAGE_CACHE_SIZE);	ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);	return mask;}STATIC inline voidset_page_region(	struct page	*page,	size_t		offset,	size_t		length){	set_page_private(page,		page_private(page) | page_region_mask(offset, length));	if (page_private(page) == ~0UL)		SetPageUptodate(page);}STATIC inline inttest_page_region(	struct page	*page,	size_t		offset,	size_t		length){	unsigned long	mask = page_region_mask(offset, length);	return (mask && (page_private(page) & mask) == mask);}/* * Mapping of multi-page buffers into contiguous virtual space */typedef struct a_list {	void		*vm_addr;	struct a_list	*next;} a_list_t;STATIC a_list_t		*as_free_head;STATIC int		as_list_len;STATIC DEFINE_SPINLOCK(as_lock);/* * Try to batch vunmaps because they are costly. */STATIC voidfree_address(	void		*addr){	a_list_t	*aentry;	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);	if (likely(aentry)) {		spin_lock(&as_lock);		aentry->next = as_free_head;		aentry->vm_addr = addr;		as_free_head = aentry;		as_list_len++;		spin_unlock(&as_lock);	} else {		vunmap(addr);	}}STATIC voidpurge_addresses(void){	a_list_t	*aentry, *old;	if (as_free_head == NULL)		return;	spin_lock(&as_lock);	aentry = as_free_head;	as_free_head = NULL;	as_list_len = 0;	spin_unlock(&as_lock);	while ((old = aentry) != NULL) {		vunmap(aentry->vm_addr);		aentry = aentry->next;		kfree(old);	}}/* *	Internal pagebuf object manipulation */STATIC void_pagebuf_initialize(	xfs_buf_t		*pb,	xfs_buftarg_t		*target,	loff_t			range_base,	size_t			range_length,	page_buf_flags_t	flags){	/*	 * We don't want certain flags to appear in pb->pb_flags.	 */	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);	memset(pb, 0, sizeof(xfs_buf_t));	atomic_set(&pb->pb_hold, 1);	init_MUTEX_LOCKED(&pb->pb_iodonesema);	INIT_LIST_HEAD(&pb->pb_list);	INIT_LIST_HEAD(&pb->pb_hash_list);	init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */	PB_SET_OWNER(pb);	pb->pb_target = target;	pb->pb_file_offset = range_base;	/*	 * Set buffer_length and count_desired to the same value initially.	 * I/O routines should use count_desired, which will be the same in	 * most cases but may be reset (e.g. XFS recovery).	 */	pb->pb_buffer_length = pb->pb_count_desired = range_length;	pb->pb_flags = flags;	pb->pb_bn = XFS_BUF_DADDR_NULL;	atomic_set(&pb->pb_pin_count, 0);	init_waitqueue_head(&pb->pb_waiters);	XFS_STATS_INC(pb_create);	PB_TRACE(pb, "initialize", target);}/* * Allocate a page array capable of holding a specified number * of pages, and point the page buf at it. */STATIC int_pagebuf_get_pages(	xfs_buf_t		*pb,	int			page_count,	page_buf_flags_t	flags){	/* Make sure that we have a page list */	if (pb->pb_pages == NULL) {		pb->pb_offset = page_buf_poff(pb->pb_file_offset);		pb->pb_page_count = page_count;		if (page_count <= PB_PAGES) {			pb->pb_pages = pb->pb_page_array;		} else {			pb->pb_pages = kmem_alloc(sizeof(struct page *) *					page_count, pb_to_km(flags));			if (pb->pb_pages == NULL)				return -ENOMEM;		}		memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);	}	return 0;}/* *	Frees pb_pages if it was malloced. */STATIC void_pagebuf_free_pages(	xfs_buf_t	*bp){	if (bp->pb_pages != bp->pb_page_array) {		kmem_free(bp->pb_pages,			  bp->pb_page_count * sizeof(struct page *));	}}/* *	Releases the specified buffer. * * 	The modification state of any associated pages is left unchanged. * 	The buffer most not be on any hash - use pagebuf_rele instead for * 	hashed and refcounted buffers */voidpagebuf_free(	xfs_buf_t		*bp){	PB_TRACE(bp, "free", 0);	ASSERT(list_empty(&bp->pb_hash_list));	if (bp->pb_flags & _PBF_PAGE_CACHE) {		uint		i;		if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))			free_address(bp->pb_addr - bp->pb_offset);		for (i = 0; i < bp->pb_page_count; i++)			page_cache_release(bp->pb_pages[i]);		_pagebuf_free_pages(bp);	} else if (bp->pb_flags & _PBF_KMEM_ALLOC) {		 /*		  * XXX(hch): bp->pb_count_desired might be incorrect (see		  * pagebuf_associate_memory for details), but fortunately		  * the Linux version of kmem_free ignores the len argument..		  */		kmem_free(bp->pb_addr, bp->pb_count_desired);		_pagebuf_free_pages(bp);	}	pagebuf_deallocate(bp);}/* *	Finds all pages for buffer in question and builds it's page list. */STATIC int_pagebuf_lookup_pages(	xfs_buf_t		*bp,	uint			flags){	struct address_space	*mapping = bp->pb_target->pbr_mapping;	size_t			blocksize = bp->pb_target->pbr_bsize;	size_t			size = bp->pb_count_desired;	size_t			nbytes, offset;	gfp_t			gfp_mask = pb_to_gfp(flags);	unsigned short		page_count, i;	pgoff_t			first;	loff_t			end;	int			error;	end = bp->pb_file_offset + bp->pb_buffer_length;	page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);	error = _pagebuf_get_pages(bp, page_count, flags);	if (unlikely(error))		return error;	bp->pb_flags |= _PBF_PAGE_CACHE;	offset = bp->pb_offset;	first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;	for (i = 0; i < bp->pb_page_count; i++) {		struct page	*page;		uint		retries = 0;	      retry:		page = find_or_create_page(mapping, first + i, gfp_mask);		if (unlikely(page == NULL)) {			if (flags & PBF_READ_AHEAD) {				bp->pb_page_count = i;				for (i = 0; i < bp->pb_page_count; i++)					unlock_page(bp->pb_pages[i]);				return -ENOMEM;			}			/*			 * This could deadlock.			 *			 * But until all the XFS lowlevel code is revamped to			 * handle buffer allocation failures we can't do much.			 */			if (!(++retries % 100))				printk(KERN_ERR					"XFS: possible memory allocation "					"deadlock in %s (mode:0x%x)\n",					__FUNCTION__, gfp_mask);			XFS_STATS_INC(pb_page_retries);			xfsbufd_wakeup(0, gfp_mask);			blk_congestion_wait(WRITE, HZ/50);			goto retry;		}		XFS_STATS_INC(pb_page_found);		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);		size -= nbytes;		if (!PageUptodate(page)) {			page_count--;			if (blocksize >= PAGE_CACHE_SIZE) {				if (flags & PBF_READ)					bp->pb_locked = 1;			} else if (!PagePrivate(page)) {				if (test_page_region(page, offset, nbytes))					page_count++;			}		}		bp->pb_pages[i] = page;		offset = 0;	}	if (!bp->pb_locked) {		for (i = 0; i < bp->pb_page_count; i++)			unlock_page(bp->pb_pages[i]);	}	if (page_count == bp->pb_page_count)		bp->pb_flags |= PBF_DONE;	PB_TRACE(bp, "lookup_pages", (long)page_count);	return error;}/* *	Map buffer into kernel address-space if nessecary. */STATIC int_pagebuf_map_pages(	xfs_buf_t		*bp,	uint			flags){	/* A single page buffer is always mappable */	if (bp->pb_page_count == 1) {		bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;		bp->pb_flags |= PBF_MAPPED;	} else if (flags & PBF_MAPPED) {		if (as_list_len > 64)			purge_addresses();		bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,				VM_MAP, PAGE_KERNEL);		if (unlikely(bp->pb_addr == NULL))			return -ENOMEM;		bp->pb_addr += bp->pb_offset;		bp->pb_flags |= PBF_MAPPED;	}	return 0;}/* *	Finding and Reading Buffers *//* *	_pagebuf_find * *	Looks up, and creates if absent, a lockable buffer for *	a given range of an inode.  The buffer is returned *	locked.	 If other overlapping buffers exist, they are *	released before the new buffer is created and locked, *	which may imply that this call will block until those buffers *	are unlocked.  No I/O is implied by this call. */xfs_buf_t *_pagebuf_find(	xfs_buftarg_t		*btp,	/* block device target		*/	loff_t			ioff,	/* starting offset of range	*/	size_t			isize,	/* length of range		*/	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/	xfs_buf_t		*new_pb)/* newly allocated buffer	*/{	loff_t			range_base;	size_t			range_length;	xfs_bufhash_t		*hash;	xfs_buf_t		*pb, *n;	range_base = (ioff << BBSHIFT);	range_length = (isize << BBSHIFT);	/* Check for IOs smaller than the sector size / not sector aligned */	ASSERT(!(range_length < (1 << btp->pbr_sshift)));	ASSERT(!(range_base & (loff_t)btp->pbr_smask));	hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];	spin_lock(&hash->bh_lock);	list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {		ASSERT(btp == pb->pb_target);		if (pb->pb_file_offset == range_base &&		    pb->pb_buffer_length == range_length) {			/*			 * If we look at something bring it to the			 * front of the list for next time.			 */			atomic_inc(&pb->pb_hold);			list_move(&pb->pb_hash_list, &hash->bh_list);			goto found;		}	}	/* No match found */	if (new_pb) {		_pagebuf_initialize(new_pb, btp, range_base,				range_length, flags);		new_pb->pb_hash = hash;		list_add(&new_pb->pb_hash_list, &hash->bh_list);	} else {		XFS_STATS_INC(pb_miss_locked);	}	spin_unlock(&hash->bh_lock);	return new_pb;found:	spin_unlock(&hash->bh_lock);	/* Attempt to get the semaphore without sleeping,	 * if this does not work then we need to drop the	 * spinlock and do a hard attempt on the semaphore.	 */	if (down_trylock(&pb->pb_sema)) {		if (!(flags & PBF_TRYLOCK)) {			/* wait for buffer ownership */			PB_TRACE(pb, "get_lock", 0);			pagebuf_lock(pb);			XFS_STATS_INC(pb_get_locked_waited);		} else {			/* We asked for a trylock and failed, no need			 * to look at file offset and length here, we			 * know that this pagebuf at least overlaps our			 * pagebuf and is locked, therefore our buffer			 * either does not exist, or is this buffer			 */			pagebuf_rele(pb);			XFS_STATS_INC(pb_busy_locked);			return (NULL);		}	} else {		/* trylock worked */		PB_SET_OWNER(pb);	}	if (pb->pb_flags & PBF_STALE) {		ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);		pb->pb_flags &= PBF_MAPPED;	}	PB_TRACE(pb, "got_lock", 0);	XFS_STATS_INC(pb_get_locked);	return (pb);}/* *	xfs_buf_get_flags assembles a buffer covering the specified range. * *	Storage in memory for all portions of the buffer will be allocated, *	although backing storage may not be. */xfs_buf_t *xfs_buf_get_flags(			/* allocate a buffer		*/	xfs_buftarg_t		*target,/* target for buffer		*/	loff_t			ioff,	/* starting offset of range	*/	size_t			isize,	/* length of range		*/	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/{	xfs_buf_t		*pb, *new_pb;	int			error = 0, i;	new_pb = pagebuf_allocate(flags);	if (unlikely(!new_pb))		return NULL;	pb = _pagebuf_find(target, ioff, isize, flags, new_pb);	if (pb == new_pb) {		error = _pagebuf_lookup_pages(pb, flags);		if (error)			goto no_buffer;	} else {		pagebuf_deallocate(new_pb);		if (unlikely(pb == NULL))			return NULL;	}	for (i = 0; i < pb->pb_page_count; i++)		mark_page_accessed(pb->pb_pages[i]);	if (!(pb->pb_flags & PBF_MAPPED)) {		error = _pagebuf_map_pages(pb, flags);		if (unlikely(error)) {			printk(KERN_WARNING "%s: failed to map pages\n",					__FUNCTION__);			goto no_buffer;		}	}	XFS_STATS_INC(pb_get);	/*	 * Always fill in the block number now, the mapped cases can do	 * their own overlay of this later.	 */	pb->pb_bn = ioff;	pb->pb_count_desired = pb->pb_buffer_length;	PB_TRACE(pb, "get", (unsigned long)flags);	return pb; no_buffer:	if (flags & (PBF_LOCK | PBF_TRYLOCK))		pagebuf_unlock(pb);	pagebuf_rele(pb);	return NULL;}xfs_buf_t *xfs_buf_read_flags(	xfs_buftarg_t		*target,	loff_t			ioff,	size_t			isize,	page_buf_flags_t	flags){	xfs_buf_t		*pb;	flags |= PBF_READ;	pb = xfs_buf_get_flags(target, ioff, isize, flags);	if (pb) {
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -