xfs_buf.c

来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,839 行 · 第 1/3 页

C
1,839
字号
/* * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Further, this software is distributed without any warranty that it is * free of the rightful claim of any third person regarding infringement * or the like.  Any license provided herein, whether implied or * otherwise, applies only to this software file.  Patent licenses, if * any, provided herein do not apply to combinations of this program with * other software, or any other product whatsoever. * * You should have received a copy of the GNU General Public License along * with this program; if not, write the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston MA 02111-1307, USA. * * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, * Mountain View, CA  94043, or: * * http://www.sgi.com * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ *//* *	The xfs_buf.c code provides an abstract buffer cache model on top *	of the Linux page cache.  Cached metadata blocks for a file system *	are hashed to the inode for the block device.  xfs_buf.c assembles *	buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O. * *      Written by Steve Lord, Jim Mostek, Russell Cattelan *		    and Rajagopal Ananthanarayanan ("ananth") at SGI. * */#include <linux/stddef.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/bio.h>#include <linux/sysctl.h>#include <linux/proc_fs.h>#include <linux/workqueue.h>#include <linux/suspend.h>#include <linux/percpu.h>#include "xfs_linux.h"#ifndef GFP_READAHEAD#define GFP_READAHEAD	(__GFP_NOWARN|__GFP_NORETRY)#endif/* * File wide globals */STATIC kmem_cache_t *pagebuf_cache;STATIC kmem_shaker_t pagebuf_shake;STATIC int pagebuf_daemon_wakeup(int, unsigned int);STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);STATIC struct workqueue_struct *pagebuf_logio_workqueue;STATIC struct workqueue_struct *pagebuf_dataio_workqueue;/* * Pagebuf debugging */#ifdef PAGEBUF_TRACEvoidpagebuf_trace(	xfs_buf_t	*pb,	char		*id,	void		*data,	void		*ra){	ktrace_enter(pagebuf_trace_buf,		pb, id,		(void *)(unsigned long)pb->pb_flags,		(void *)(unsigned long)pb->pb_hold.counter,		(void *)(unsigned long)pb->pb_sema.count.counter,		(void *)current,		data, ra,		(void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),		(void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),		(void *)(unsigned long)pb->pb_buffer_length,		NULL, NULL, NULL, NULL, NULL);}ktrace_t *pagebuf_trace_buf;#define PAGEBUF_TRACE_SIZE	4096#define PB_TRACE(pb, id, data)	\	pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))#else#define PB_TRACE(pb, id, data)	do { } while (0)#endif#ifdef PAGEBUF_LOCK_TRACKING# define PB_SET_OWNER(pb)	((pb)->pb_last_holder = current->pid)# define PB_CLEAR_OWNER(pb)	((pb)->pb_last_holder = -1)# define PB_GET_OWNER(pb)	((pb)->pb_last_holder)#else# define PB_SET_OWNER(pb)	do { } while (0)# define PB_CLEAR_OWNER(pb)	do { } while (0)# define PB_GET_OWNER(pb)	do { } while (0)#endif/* * Pagebuf allocation / freeing. */#define pb_to_gfp(flags) \	(((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \	 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL)#define pb_to_km(flags) \	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)#define pagebuf_allocate(flags) \	kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))#define pagebuf_deallocate(pb) \	kmem_zone_free(pagebuf_cache, (pb));/* * Pagebuf hashing */#define NBITS	8#define NHASH	(1<<NBITS)typedef struct {	struct list_head	pb_hash;	spinlock_t		pb_hash_lock;} pb_hash_t;STATIC pb_hash_t	pbhash[NHASH];#define pb_hash(pb)	&pbhash[pb->pb_hash_index]STATIC int_bhash(	struct block_device *bdev,	loff_t		base){	int		bit, hval;	base >>= 9;	base ^= (unsigned long)bdev / L1_CACHE_BYTES;	for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {		hval ^= (int)base & (NHASH-1);		base >>= NBITS;	}	return hval;}/* * Mapping of multi-page buffers into contiguous virtual space */typedef struct a_list {	void		*vm_addr;	struct a_list	*next;} a_list_t;STATIC a_list_t		*as_free_head;STATIC int		as_list_len;STATIC spinlock_t	as_lock = SPIN_LOCK_UNLOCKED;/* * Try to batch vunmaps because they are costly. */STATIC voidfree_address(	void		*addr){	a_list_t	*aentry;	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC);	if (aentry) {		spin_lock(&as_lock);		aentry->next = as_free_head;		aentry->vm_addr = addr;		as_free_head = aentry;		as_list_len++;		spin_unlock(&as_lock);	} else {		vunmap(addr);	}}STATIC voidpurge_addresses(void){	a_list_t	*aentry, *old;	if (as_free_head == NULL)		return;	spin_lock(&as_lock);	aentry = as_free_head;	as_free_head = NULL;	as_list_len = 0;	spin_unlock(&as_lock);	while ((old = aentry) != NULL) {		vunmap(aentry->vm_addr);		aentry = aentry->next;		kfree(old);	}}/* *	Internal pagebuf object manipulation */STATIC void_pagebuf_initialize(	xfs_buf_t		*pb,	xfs_buftarg_t		*target,	loff_t			range_base,	size_t			range_length,	page_buf_flags_t	flags){	/*	 * We don't want certain flags to appear in pb->pb_flags.	 */	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);	memset(pb, 0, sizeof(xfs_buf_t));	atomic_set(&pb->pb_hold, 1);	init_MUTEX_LOCKED(&pb->pb_iodonesema);	INIT_LIST_HEAD(&pb->pb_list);	INIT_LIST_HEAD(&pb->pb_hash_list);	init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */	PB_SET_OWNER(pb);	pb->pb_target = target;	pb->pb_file_offset = range_base;	/*	 * Set buffer_length and count_desired to the same value initially.	 * I/O routines should use count_desired, which will be the same in	 * most cases but may be reset (e.g. XFS recovery).	 */	pb->pb_buffer_length = pb->pb_count_desired = range_length;	pb->pb_flags = flags | PBF_NONE;	pb->pb_bn = XFS_BUF_DADDR_NULL;	atomic_set(&pb->pb_pin_count, 0);	init_waitqueue_head(&pb->pb_waiters);	XFS_STATS_INC(pb_create);	PB_TRACE(pb, "initialize", target);}/* * Allocate a page array capable of holding a specified number * of pages, and point the page buf at it. */STATIC int_pagebuf_get_pages(	xfs_buf_t		*pb,	int			page_count,	page_buf_flags_t	flags){	/* Make sure that we have a page list */	if (pb->pb_pages == NULL) {		pb->pb_offset = page_buf_poff(pb->pb_file_offset);		pb->pb_page_count = page_count;		if (page_count <= PB_PAGES) {			pb->pb_pages = pb->pb_page_array;		} else {			pb->pb_pages = kmem_alloc(sizeof(struct page *) *					page_count, pb_to_km(flags));			if (pb->pb_pages == NULL)				return -ENOMEM;		}		memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);	}	return 0;}/* *	Frees pb_pages if it was malloced. */STATIC void_pagebuf_free_pages(	xfs_buf_t	*bp){	if (bp->pb_pages != bp->pb_page_array) {		kmem_free(bp->pb_pages,			  bp->pb_page_count * sizeof(struct page *));	}}/* *	Releases the specified buffer. * * 	The modification state of any associated pages is left unchanged. * 	The buffer most not be on any hash - use pagebuf_rele instead for * 	hashed and refcounted buffers */voidpagebuf_free(	xfs_buf_t		*bp){	PB_TRACE(bp, "free", 0);	ASSERT(list_empty(&bp->pb_hash_list));	if (bp->pb_flags & _PBF_PAGE_CACHE) {		uint		i;		if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))			free_address(bp->pb_addr - bp->pb_offset);		for (i = 0; i < bp->pb_page_count; i++)			page_cache_release(bp->pb_pages[i]);		_pagebuf_free_pages(bp);	} else if (bp->pb_flags & _PBF_KMEM_ALLOC) {		 /*		  * XXX(hch): bp->pb_count_desired might be incorrect (see		  * pagebuf_associate_memory for details), but fortunately		  * the Linux version of kmem_free ignores the len argument..		  */		kmem_free(bp->pb_addr, bp->pb_count_desired);		_pagebuf_free_pages(bp);	}	pagebuf_deallocate(bp);}/* *	Finds all pages for buffer in question and builds it's page list. */STATIC int_pagebuf_lookup_pages(	xfs_buf_t		*bp,	uint			flags){	struct address_space	*mapping = bp->pb_target->pbr_mapping;	unsigned int		sectorshift = bp->pb_target->pbr_sshift;	size_t			blocksize = bp->pb_target->pbr_bsize;	size_t			size = bp->pb_count_desired;	size_t			nbytes, offset;	int			gfp_mask = pb_to_gfp(flags);	unsigned short		page_count, i;	pgoff_t			first;	loff_t			end;	int			error;	end = bp->pb_file_offset + bp->pb_buffer_length;	page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);	error = _pagebuf_get_pages(bp, page_count, flags);	if (unlikely(error))		return error;	bp->pb_flags |= _PBF_PAGE_CACHE;	offset = bp->pb_offset;	first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;	for (i = 0; i < bp->pb_page_count; i++) {		struct page	*page;		uint		retries = 0;	      retry:		page = find_or_create_page(mapping, first + i, gfp_mask);		if (unlikely(page == NULL)) {			if (flags & PBF_READ_AHEAD) {				bp->pb_page_count = i;				for (i = 0; i < bp->pb_page_count; i++)					unlock_page(bp->pb_pages[i]);				return -ENOMEM;			}			/*			 * This could deadlock.			 *			 * But until all the XFS lowlevel code is revamped to			 * handle buffer allocation failures we can't do much.			 */			if (!(++retries % 100))				printk(KERN_ERR					"possible deadlock in %s (mode:0x%x)\n",					__FUNCTION__, gfp_mask);			XFS_STATS_INC(pb_page_retries);			pagebuf_daemon_wakeup(0, gfp_mask);			set_current_state(TASK_UNINTERRUPTIBLE);			schedule_timeout(10);			goto retry;		}		XFS_STATS_INC(pb_page_found);		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);		size -= nbytes;		if (!PageUptodate(page)) {			page_count--;			if (blocksize == PAGE_CACHE_SIZE) {				if (flags & PBF_READ)					bp->pb_locked = 1;			} else if (!PagePrivate(page)) {				unsigned long	j, range;				/*				 * In this case page->private holds a bitmap				 * of uptodate sectors within the page				 */				ASSERT(blocksize < PAGE_CACHE_SIZE);				range = (offset + nbytes) >> sectorshift;				for (j = offset >> sectorshift; j < range; j++)					if (!test_bit(j, &page->private))						break;				if (j == range)					page_count++;			}		}		bp->pb_pages[i] = page;		offset = 0;	}	if (!bp->pb_locked) {		for (i = 0; i < bp->pb_page_count; i++)			unlock_page(bp->pb_pages[i]);	}	if (page_count) {		/* if we have any uptodate pages, mark that in the buffer */		bp->pb_flags &= ~PBF_NONE;		/* if some pages aren't uptodate, mark that in the buffer */		if (page_count != bp->pb_page_count)			bp->pb_flags |= PBF_PARTIAL;	}	PB_TRACE(bp, "lookup_pages", (long)page_count);	return error;}/* *	Map buffer into kernel address-space if nessecary. */STATIC int_pagebuf_map_pages(	xfs_buf_t		*bp,	uint			flags){	/* A single page buffer is always mappable */	if (bp->pb_page_count == 1) {		bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;		bp->pb_flags |= PBF_MAPPED;	} else if (flags & PBF_MAPPED) {		if (as_list_len > 64)			purge_addresses();		bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,				VM_MAP, PAGE_KERNEL);		if (unlikely(bp->pb_addr == NULL))			return -ENOMEM;		bp->pb_addr += bp->pb_offset;		bp->pb_flags |= PBF_MAPPED;	}	return 0;}/* *	Finding and Reading Buffers *//* *	_pagebuf_find * *	Looks up, and creates if absent, a lockable buffer for *	a given range of an inode.  The buffer is returned *	locked.	 If other overlapping buffers exist, they are *	released before the new buffer is created and locked, *	which may imply that this call will block until those buffers *	are unlocked.  No I/O is implied by this call. */STATIC xfs_buf_t *_pagebuf_find(				/* find buffer for block	*/	xfs_buftarg_t		*target,/* target for block		*/	loff_t			ioff,	/* starting offset of range	*/	size_t			isize,	/* length of range		*/	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/	xfs_buf_t		*new_pb)/* newly allocated buffer	*/{	loff_t			range_base;	size_t			range_length;	int			hval;	pb_hash_t		*h;	xfs_buf_t		*pb, *n;	int			not_locked;	range_base = (ioff << BBSHIFT);	range_length = (isize << BBSHIFT);	/* Ensure we never do IOs smaller than the sector size */	BUG_ON(range_length < (1 << target->pbr_sshift));	/* Ensure we never do IOs that are not sector aligned */	BUG_ON(range_base & (loff_t)target->pbr_smask);	hval = _bhash(target->pbr_bdev, range_base);	h = &pbhash[hval];	spin_lock(&h->pb_hash_lock);	list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {		if (pb->pb_target == target &&		    pb->pb_file_offset == range_base &&		    pb->pb_buffer_length == range_length) {			/* If we look at something bring it to the			 * front of the list for next time			 */			atomic_inc(&pb->pb_hold);			list_move(&pb->pb_hash_list, &h->pb_hash);			goto found;		}	}	/* No match found */	if (new_pb) {		_pagebuf_initialize(new_pb, target, range_base,				range_length, flags);		new_pb->pb_hash_index = hval;		list_add(&new_pb->pb_hash_list, &h->pb_hash);	} else {		XFS_STATS_INC(pb_miss_locked);	}	spin_unlock(&h->pb_hash_lock);	return (new_pb);found:	spin_unlock(&h->pb_hash_lock);	/* Attempt to get the semaphore without sleeping,	 * if this does not work then we need to drop the	 * spinlock and do a hard attempt on the semaphore.	 */	not_locked = down_trylock(&pb->pb_sema);	if (not_locked) {		if (!(flags & PBF_TRYLOCK)) {			/* wait for buffer ownership */			PB_TRACE(pb, "get_lock", 0);			pagebuf_lock(pb);			XFS_STATS_INC(pb_get_locked_waited);		} else {			/* We asked for a trylock and failed, no need			 * to look at file offset and length here, we			 * know that this pagebuf at least overlaps our			 * pagebuf and is locked, therefore our buffer			 * either does not exist, or is this buffer			 */			pagebuf_rele(pb);			XFS_STATS_INC(pb_busy_locked);			return (NULL);		}	} else {		/* trylock worked */		PB_SET_OWNER(pb);	}	if (pb->pb_flags & PBF_STALE)		pb->pb_flags &= PBF_MAPPED;	PB_TRACE(pb, "got_lock", 0);	XFS_STATS_INC(pb_get_locked);	return (pb);}/* *	pagebuf_find * *	pagebuf_find returns a buffer matching the specified range of *	data for the specified target, if any of the relevant blocks *	are in memory.  The buffer may have unallocated holes, if *	some, but not all, of the blocks are in memory.  Even where *	pages are present in the buffer, not all of every page may be *	valid. */xfs_buf_t *pagebuf_find(				/* find buffer for block	*/					/* if the block is in memory	*/	xfs_buftarg_t		*target,/* target for block		*/	loff_t			ioff,	/* starting offset of range	*/	size_t			isize,	/* length of range		*/	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/{	return _pagebuf_find(target, ioff, isize, flags, NULL);}/* *	pagebuf_get * *	pagebuf_get assembles a buffer covering the specified range. *	Some or all of the blocks in the range may be valid.  Storage *	in memory for all portions of the buffer will be allocated, *	although backing storage may not be.  If PBF_READ is set in *	flags, pagebuf_iostart is called also. */xfs_buf_t *pagebuf_get(				/* allocate a buffer		*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?