📄 xfs_buf.c

📁 linux-2.4.29操作系统的源码
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Further, this software is distributed without any warranty that it is * free of the rightful claim of any third person regarding infringement * or the like.  Any license provided herein, whether implied or * otherwise, applies only to this software file.  Patent licenses, if * any, provided herein do not apply to combinations of this program with * other software, or any other product whatsoever. * * You should have received a copy of the GNU General Public License along * with this program; if not, write the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston MA 02111-1307, USA. * * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, * Mountain View, CA  94043, or: * * http://www.sgi.com * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ *//* *	The xfs_buf.c code provides an abstract buffer cache model on top *	of the Linux page cache.  Cached metadata blocks for a file system *	are hashed to the inode for the block device.  xfs_buf.c assembles *	buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O. * *      Written by Steve Lord, Jim Mostek, Russell Cattelan *		    and Rajagopal Ananthanarayanan ("ananth") at SGI. * */#include <linux/stddef.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/blkdev.h>#include <linux/locks.h>#include <linux/sysctl.h>#include <linux/proc_fs.h>#include "xfs_linux.h"#define BN_ALIGN_MASK	((1 << (PAGE_CACHE_SHIFT - BBSHIFT)) - 1)#ifndef GFP_READAHEAD#define GFP_READAHEAD	0#endif/* * A backport of the 2.5 scheduler is used by many vendors of 2.4-based * distributions. * We can only guess it's presences by the lack of the SCHED_YIELD flag. * If the heuristic doesn't work, change this define by hand. */#ifndef SCHED_YIELD#define __HAVE_NEW_SCHEDULER	1#endif/* * cpumask_t is used for supporting NR_CPUS > BITS_PER_LONG. * If support for this is present, migrate_to_cpu exists and provides * a wrapper around the set_cpus_allowed routine. */#ifdef copy_cpumask#define __HAVE_CPUMASK_T	1#endif#ifndef __HAVE_CPUMASK_T# ifndef __HAVE_NEW_SCHEDULER#  define migrate_to_cpu(cpu)	\	do { current->cpus_allowed = 1UL << (cpu); } while (0)# else#  define migrate_to_cpu(cpu)	\	set_cpus_allowed(current, 1UL << (cpu))# endif#endif#ifndef VM_MAP#define VM_MAP	VM_ALLOC#endif/* * File wide globals */STATIC kmem_cache_t *pagebuf_cache;STATIC kmem_shaker_t pagebuf_shake;#define MAX_IO_DAEMONS		NR_CPUS#define CPU_TO_DAEMON(cpu)	(cpu)STATIC int pb_logio_daemons[MAX_IO_DAEMONS];STATIC struct list_head pagebuf_logiodone_tq[MAX_IO_DAEMONS];STATIC wait_queue_head_t pagebuf_logiodone_wait[MAX_IO_DAEMONS];STATIC int pb_dataio_daemons[MAX_IO_DAEMONS];STATIC struct list_head pagebuf_dataiodone_tq[MAX_IO_DAEMONS];STATIC wait_queue_head_t pagebuf_dataiodone_wait[MAX_IO_DAEMONS];/* * For pre-allocated buffer head pool */#define NR_RESERVED_BH	64static wait_queue_head_t	pb_resv_bh_wait;static spinlock_t		pb_resv_bh_lock = SPIN_LOCK_UNLOCKED;struct buffer_head		*pb_resv_bh = NULL;	/* list of bh */int				pb_resv_bh_cnt = 0;	/* # of bh available */STATIC void _pagebuf_ioapply(xfs_buf_t *);STATIC int pagebuf_daemon_wakeup(int, unsigned int);STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);STATIC void pagebuf_runall_queues(struct list_head[]);/* * Pagebuf debugging */#ifdef PAGEBUF_TRACEvoidpagebuf_trace(	xfs_buf_t	*pb,	char		*id,	void		*data,	void		*ra){	ktrace_enter(pagebuf_trace_buf,		pb, id,		(void *)(unsigned long)pb->pb_flags,		(void *)(unsigned long)pb->pb_hold.counter,		(void *)(unsigned long)pb->pb_sema.count.counter,		(void *)current,		data, ra,		(void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),		(void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),		(void *)(unsigned long)pb->pb_buffer_length,		NULL, NULL, NULL, NULL, NULL);}ktrace_t *pagebuf_trace_buf;#define PAGEBUF_TRACE_SIZE	4096#define PB_TRACE(pb, id, data)	\	pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))#else#define PB_TRACE(pb, id, data)	do { } while (0)#endif#ifdef PAGEBUF_LOCK_TRACKING# define PB_SET_OWNER(pb)	((pb)->pb_last_holder = current->pid)# define PB_CLEAR_OWNER(pb)	((pb)->pb_last_holder = -1)# define PB_GET_OWNER(pb)	((pb)->pb_last_holder)#else# define PB_SET_OWNER(pb)	do { } while (0)# define PB_CLEAR_OWNER(pb)	do { } while (0)# define PB_GET_OWNER(pb)	do { } while (0)#endif/* * Pagebuf allocation / freeing. */#define pb_to_gfp(flags) \	(((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \	 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL)#define pb_to_km(flags) \	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)#define pagebuf_allocate(flags) \	kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))#define pagebuf_deallocate(pb) \	kmem_zone_free(pagebuf_cache, (pb));/* * Pagebuf hashing */#define NBITS	8#define NHASH	(1<<NBITS)typedef struct {	struct list_head	pb_hash;	spinlock_t		pb_hash_lock;} pb_hash_t;STATIC pb_hash_t	pbhash[NHASH];#define pb_hash(pb)	&pbhash[pb->pb_hash_index]STATIC int_bhash(	struct block_device *bdev,	loff_t		base){	int		bit, hval;	base >>= 9;	base ^= (unsigned long)bdev / L1_CACHE_BYTES;	for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {		hval ^= (int)base & (NHASH-1);		base >>= NBITS;	}	return hval;}/* * Mapping of multi-page buffers into contiguous virtual space */typedef struct a_list {	void		*vm_addr;	struct a_list	*next;} a_list_t;STATIC a_list_t		*as_free_head;STATIC int		as_list_len;STATIC spinlock_t	as_lock = SPIN_LOCK_UNLOCKED;/* * Try to batch vunmaps because they are costly. */STATIC voidfree_address(	void		*addr){	a_list_t	*aentry;	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC);	if (aentry) {		spin_lock(&as_lock);		aentry->next = as_free_head;		aentry->vm_addr = addr;		as_free_head = aentry;		as_list_len++;		spin_unlock(&as_lock);	} else {		vunmap(addr);	}}STATIC voidpurge_addresses(void){	a_list_t	*aentry, *old;	if (as_free_head == NULL)		return;	spin_lock(&as_lock);	aentry = as_free_head;	as_free_head = NULL;	as_list_len = 0;	spin_unlock(&as_lock);	while ((old = aentry) != NULL) {		vunmap(aentry->vm_addr);		aentry = aentry->next;		kfree(old);	}}/* *	Internal pagebuf object manipulation */STATIC void_pagebuf_initialize(	xfs_buf_t		*pb,	xfs_buftarg_t		*target,	loff_t			range_base,	size_t			range_length,	page_buf_flags_t	flags){	/*	 * We don't want certain flags to appear in pb->pb_flags.	 */	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);	memset(pb, 0, sizeof(xfs_buf_t));	atomic_set(&pb->pb_hold, 1);	init_MUTEX_LOCKED(&pb->pb_iodonesema);	INIT_LIST_HEAD(&pb->pb_list);	INIT_LIST_HEAD(&pb->pb_hash_list);	init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */	PB_SET_OWNER(pb);	pb->pb_target = target;	pb->pb_file_offset = range_base;	/*	 * Set buffer_length and count_desired to the same value initially.	 * I/O routines should use count_desired, which will be the same in	 * most cases but may be reset (e.g. XFS recovery).	 */	pb->pb_buffer_length = pb->pb_count_desired = range_length;	pb->pb_flags = flags | PBF_NONE;	pb->pb_bn = XFS_BUF_DADDR_NULL;	atomic_set(&pb->pb_pin_count, 0);	init_waitqueue_head(&pb->pb_waiters);	XFS_STATS_INC(pb_create);	PB_TRACE(pb, "initialize", target);}/* * Allocate a page array capable of holding a specified number * of pages, and point the page buf at it. */STATIC int_pagebuf_get_pages(	xfs_buf_t		*pb,	int			page_count,	page_buf_flags_t	flags){	/* Make sure that we have a page list */	if (pb->pb_pages == NULL) {		pb->pb_offset = page_buf_poff(pb->pb_file_offset);		pb->pb_page_count = page_count;		if (page_count <= PB_PAGES) {			pb->pb_pages = pb->pb_page_array;		} else {			pb->pb_pages = kmem_alloc(sizeof(struct page *) *					page_count, pb_to_km(flags));			if (pb->pb_pages == NULL)				return -ENOMEM;		}		memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);	}	return 0;}/* *	Frees pb_pages if it was malloced. */STATIC void_pagebuf_free_pages(	xfs_buf_t	*bp){	if (bp->pb_pages != bp->pb_page_array) {		kmem_free(bp->pb_pages,			  bp->pb_page_count * sizeof(struct page *));	}}/* *	Releases the specified buffer. * * 	The modification state of any associated pages is left unchanged. * 	The buffer most not be on any hash - use pagebuf_rele instead for * 	hashed and refcounted buffers */voidpagebuf_free(	xfs_buf_t		*bp){	PB_TRACE(bp, "free", 0);	ASSERT(list_empty(&bp->pb_hash_list));	if (bp->pb_flags & _PBF_PAGE_CACHE) {		uint		i;		if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))			free_address(bp->pb_addr - bp->pb_offset);		for (i = 0; i < bp->pb_page_count; i++)			page_cache_release(bp->pb_pages[i]);		_pagebuf_free_pages(bp);	} else if (bp->pb_flags & _PBF_KMEM_ALLOC) {		 /*		  * XXX(hch): bp->pb_count_desired might be incorrect (see		  * pagebuf_associate_memory for details), but fortunately		  * the Linux version of kmem_free ignores the len argument..		  */		kmem_free(bp->pb_addr, bp->pb_count_desired);		_pagebuf_free_pages(bp);	}	pagebuf_deallocate(bp);}/* *	Finds all pages for buffer in question and builds it's page list. */STATIC int_pagebuf_lookup_pages(	xfs_buf_t		*bp,	uint			flags){	struct address_space	*mapping = bp->pb_target->pbr_mapping;	size_t			blocksize = bp->pb_target->pbr_bsize;	int			gfp_mask = pb_to_gfp(flags);	unsigned short		page_count, i;	pgoff_t			first;	loff_t			end;	int			error;	end = bp->pb_file_offset + bp->pb_buffer_length;	page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);	error = _pagebuf_get_pages(bp, page_count, flags);	if (unlikely(error))		return error;	bp->pb_flags |= _PBF_PAGE_CACHE;	first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;	for (i = 0; i < bp->pb_page_count; i++) {		struct page	*page;		uint		retries = 0;	      retry:		page = find_or_create_page(mapping, first + i, gfp_mask);		if (unlikely(page == NULL)) {			if (flags & PBF_READ_AHEAD) {				bp->pb_page_count = i;				for (i = 0; i < bp->pb_page_count; i++)					unlock_page(bp->pb_pages[i]);				return -ENOMEM;			}			/*			 * This could deadlock.			 *			 * But until all the XFS lowlevel code is revamped to			 * handle buffer allocation failures we can't do much.			 */			if (!(++retries % 100))				printk(KERN_ERR					"possible deadlock in %s (mode:0x%x)\n",					__FUNCTION__, gfp_mask);			XFS_STATS_INC(pb_page_retries);			pagebuf_daemon_wakeup(0, gfp_mask);			set_current_state(TASK_UNINTERRUPTIBLE);			schedule_timeout(10);			goto retry;		}		XFS_STATS_INC(pb_page_found);		/* if we need to do I/O on a page record the fact */		if (!Page_Uptodate(page)) {			page_count--;			if (blocksize == PAGE_CACHE_SIZE && (flags & PBF_READ))				bp->pb_locked = 1;		}		bp->pb_pages[i] = page;	}	if (!bp->pb_locked) {		for (i = 0; i < bp->pb_page_count; i++)			unlock_page(bp->pb_pages[i]);	}	if (page_count) {		/* if we have any uptodate pages, mark that in the buffer */		bp->pb_flags &= ~PBF_NONE;		/* if some pages aren't uptodate, mark that in the buffer */		if (page_count != bp->pb_page_count)			bp->pb_flags |= PBF_PARTIAL;	}	PB_TRACE(bp, "lookup_pages", (long)page_count);	return error;}/* *	Map buffer into kernel address-space if nessecary. */STATIC int_pagebuf_map_pages(	xfs_buf_t		*bp,	uint			flags){	/* A single page buffer is always mappable */	if (bp->pb_page_count == 1) {		bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;		bp->pb_flags |= PBF_MAPPED;	} else if (flags & PBF_MAPPED) {		if (as_list_len > 64)			purge_addresses();		bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,				VM_MAP, PAGE_KERNEL);		if (unlikely(bp->pb_addr == NULL))			return -ENOMEM;		bp->pb_addr += bp->pb_offset;		bp->pb_flags |= PBF_MAPPED;	}	return 0;}/* *	Pre-allocation of a pool of buffer heads for use in *	low-memory situations. *//* *	_pagebuf_prealloc_bh * *	Pre-allocate a pool of "count" buffer heads at startup. *	Puts them on a list at "pb_resv_bh" *	Returns number of bh actually allocated to pool. */STATIC int_pagebuf_prealloc_bh(	int			count){	struct buffer_head	*bh;	int			i;	for (i = 0; i < count; i++) {		bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);		if (!bh)			break;		bh->b_pprev = &pb_resv_bh;		bh->b_next = pb_resv_bh;		pb_resv_bh = bh;		pb_resv_bh_cnt++;	}	return i;}/* *	_pagebuf_get_prealloc_bh * *	Get one buffer head from our pre-allocated pool. *	If pool is empty, sleep 'til one comes back in. *	Returns aforementioned buffer head. */STATIC struct buffer_head *_pagebuf_get_prealloc_bh(void){	unsigned long		flags;	struct buffer_head	*bh;	DECLARE_WAITQUEUE	(wait, current);	spin_lock_irqsave(&pb_resv_bh_lock, flags);	if (pb_resv_bh_cnt < 1) {		add_wait_queue(&pb_resv_bh_wait, &wait);		do {			set_current_state(TASK_UNINTERRUPTIBLE);			spin_unlock_irqrestore(&pb_resv_bh_lock, flags);			run_task_queue(&tq_disk);			schedule();			spin_lock_irqsave(&pb_resv_bh_lock, flags);		} while (pb_resv_bh_cnt < 1);		__set_current_state(TASK_RUNNING);		remove_wait_queue(&pb_resv_bh_wait, &wait);	}	BUG_ON(pb_resv_bh_cnt < 1);	BUG_ON(!pb_resv_bh);	bh = pb_resv_bh;	pb_resv_bh = bh->b_next;	pb_resv_bh_cnt--;	spin_unlock_irqrestore(&pb_resv_bh_lock, flags);	return bh;}/* *	_pagebuf_free_bh * *	Take care of buffer heads that we're finished with. *	Call this instead of just kmem_cache_free(bh_cachep, bh) *	when you're done with a bh. * *	If our pre-allocated pool is full, just free the buffer head. *	Otherwise, put it back in the pool, and wake up anybody *	waiting for one. */STATIC inline void_pagebuf_free_bh(	struct buffer_head	*bh){	unsigned long		flags;	int			free;
12 3 4 下一页
💿 文件大小 37768 K
👤 上传用户 sunqingyan
📂 所属分类其他
🏷️ 相关标签

#linux #29 #操作系统 #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -