xfs_buf.c

来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,839 行 · 第 1/3 页

C
1,839
字号
	int			error){	xfs_buf_t		*pb = (xfs_buf_t *)bio->bi_private;	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;	unsigned int		sectorshift = pb->pb_target->pbr_sshift;	struct bio_vec		*bvec = bio->bi_io_vec;	if (bio->bi_size)		return 1;	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))		pb->pb_error = EIO;	for (i = 0; i < bio->bi_vcnt; i++, bvec++) {		struct page	*page = bvec->bv_page;		if (pb->pb_error) {			SetPageError(page);		} else if (blocksize == PAGE_CACHE_SIZE) {			SetPageUptodate(page);		} else if (!PagePrivate(page) &&				(pb->pb_flags & _PBF_PAGE_CACHE)) {			unsigned long	j, range;			ASSERT(blocksize < PAGE_CACHE_SIZE);			range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;			for (j = bvec->bv_offset >> sectorshift; j < range; j++)				set_bit(j, &page->private);			if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1))				SetPageUptodate(page);		}		if (_pagebuf_iolocked(pb)) {			unlock_page(page);		}	}	_pagebuf_iodone(pb, 1);	bio_put(bio);	return 0;}void_pagebuf_ioapply(	xfs_buf_t		*pb){	int			i, map_i, total_nr_pages, nr_pages;	struct bio		*bio;	int			offset = pb->pb_offset;	int			size = pb->pb_count_desired;	sector_t		sector = pb->pb_bn;	unsigned int		blocksize = pb->pb_target->pbr_bsize;	int			locking = _pagebuf_iolocked(pb);	total_nr_pages = pb->pb_page_count;	map_i = 0;	/* Special code path for reading a sub page size pagebuf in --	 * we populate up the whole page, and hence the other metadata	 * in the same page.  This optimization is only valid when the	 * filesystem block size and the page size are equal.	 */	if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&	    (pb->pb_flags & PBF_READ) && locking &&	    (blocksize == PAGE_CACHE_SIZE)) {		bio = bio_alloc(GFP_NOIO, 1);		bio->bi_bdev = pb->pb_target->pbr_bdev;		bio->bi_sector = sector - (offset >> BBSHIFT);		bio->bi_end_io = bio_end_io_pagebuf;		bio->bi_private = pb;		bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);		size = 0;		atomic_inc(&pb->pb_io_remaining);		goto submit_io;	}	/* Lock down the pages which we need to for the request */	if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {		for (i = 0; size; i++) {			int		nbytes = PAGE_CACHE_SIZE - offset;			struct page	*page = pb->pb_pages[i];			if (nbytes > size)				nbytes = size;			lock_page(page);			size -= nbytes;			offset = 0;		}		offset = pb->pb_offset;		size = pb->pb_count_desired;	}next_chunk:	atomic_inc(&pb->pb_io_remaining);	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);	if (nr_pages > total_nr_pages)		nr_pages = total_nr_pages;	bio = bio_alloc(GFP_NOIO, nr_pages);	bio->bi_bdev = pb->pb_target->pbr_bdev;	bio->bi_sector = sector;	bio->bi_end_io = bio_end_io_pagebuf;	bio->bi_private = pb;	for (; size && nr_pages; nr_pages--, map_i++) {		int	nbytes = PAGE_CACHE_SIZE - offset;		if (nbytes > size)			nbytes = size;		if (bio_add_page(bio, pb->pb_pages[map_i],					nbytes, offset) < nbytes)			break;		offset = 0;		sector += nbytes >> BBSHIFT;		size -= nbytes;		total_nr_pages--;	}submit_io:	if (likely(bio->bi_size)) {		submit_bio((pb->pb_flags & PBF_READ) ? READ : WRITE, bio);		if (size)			goto next_chunk;	} else {		bio_put(bio);		pagebuf_ioerror(pb, EIO);	}	if (pb->pb_flags & _PBF_RUN_QUEUES) {		pb->pb_flags &= ~_PBF_RUN_QUEUES;		if (atomic_read(&pb->pb_io_remaining) > 1)			blk_run_address_space(pb->pb_target->pbr_mapping);	}}/* *	pagebuf_iorequest -- the core I/O request routine. */intpagebuf_iorequest(			/* start real I/O		*/	xfs_buf_t		*pb)	/* buffer to convey to device	*/{	PB_TRACE(pb, "iorequest", 0);	if (pb->pb_flags & PBF_DELWRI) {		pagebuf_delwri_queue(pb, 1);		return 0;	}	if (pb->pb_flags & PBF_WRITE) {		_pagebuf_wait_unpin(pb);	}	pagebuf_hold(pb);	/* Set the count to 1 initially, this will stop an I/O	 * completion callout which happens before we have started	 * all the I/O from calling pagebuf_iodone too early.	 */	atomic_set(&pb->pb_io_remaining, 1);	_pagebuf_ioapply(pb);	_pagebuf_iodone(pb, 0);	pagebuf_rele(pb);	return 0;}/* *	pagebuf_iowait * *	pagebuf_iowait waits for I/O to complete on the buffer supplied. *	It returns immediately if no I/O is pending.  In any case, it returns *	the error code, if any, or 0 if there is no error. */intpagebuf_iowait(	xfs_buf_t		*pb){	PB_TRACE(pb, "iowait", 0);	if (atomic_read(&pb->pb_io_remaining))		blk_run_address_space(pb->pb_target->pbr_mapping);	down(&pb->pb_iodonesema);	PB_TRACE(pb, "iowaited", (long)pb->pb_error);	return pb->pb_error;}caddr_tpagebuf_offset(	xfs_buf_t		*pb,	size_t			offset){	struct page		*page;	offset += pb->pb_offset;	page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];	return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));}/* *	pagebuf_iomove * *	Move data into or out of a buffer. */voidpagebuf_iomove(	xfs_buf_t		*pb,	/* buffer to process		*/	size_t			boff,	/* starting buffer offset	*/	size_t			bsize,	/* length to copy		*/	caddr_t			data,	/* data address			*/	page_buf_rw_t		mode)	/* read/write flag		*/{	size_t			bend, cpoff, csize;	struct page		*page;	bend = boff + bsize;	while (boff < bend) {		page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];		cpoff = page_buf_poff(boff + pb->pb_offset);		csize = min_t(size_t,			      PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);		ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));		switch (mode) {		case PBRW_ZERO:			memset(page_address(page) + cpoff, 0, csize);			break;		case PBRW_READ:			memcpy(data, page_address(page) + cpoff, csize);			break;		case PBRW_WRITE:			memcpy(page_address(page) + cpoff, data, csize);		}		boff += csize;		data += csize;	}}/* *	Handling of buftargs. */voidxfs_free_buftarg(	xfs_buftarg_t		*btp,	int			external){	xfs_flush_buftarg(btp, 1);	if (external)		xfs_blkdev_put(btp->pbr_bdev);	kmem_free(btp, sizeof(*btp));}voidxfs_incore_relse(	xfs_buftarg_t		*btp,	int			delwri_only,	int			wait){	invalidate_bdev(btp->pbr_bdev, 1);	truncate_inode_pages(btp->pbr_mapping, 0LL);}voidxfs_setsize_buftarg(	xfs_buftarg_t		*btp,	unsigned int		blocksize,	unsigned int		sectorsize){	btp->pbr_bsize = blocksize;	btp->pbr_sshift = ffs(sectorsize) - 1;	btp->pbr_smask = sectorsize - 1;	if (set_blocksize(btp->pbr_bdev, sectorsize)) {		printk(KERN_WARNING			"XFS: Cannot set_blocksize to %u on device %s\n",			sectorsize, XFS_BUFTARG_NAME(btp));	}}xfs_buftarg_t *xfs_alloc_buftarg(	struct block_device	*bdev){	xfs_buftarg_t		*btp;	btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);	btp->pbr_dev =  bdev->bd_dev;	btp->pbr_bdev = bdev;	btp->pbr_mapping = bdev->bd_inode->i_mapping;	xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev));	return btp;}/* * Pagebuf delayed write buffer handling */STATIC LIST_HEAD(pbd_delwrite_queue);STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;STATIC voidpagebuf_delwri_queue(	xfs_buf_t		*pb,	int			unlock){	PB_TRACE(pb, "delwri_q", (long)unlock);	ASSERT(pb->pb_flags & PBF_DELWRI);	spin_lock(&pbd_delwrite_lock);	/* If already in the queue, dequeue and place at tail */	if (!list_empty(&pb->pb_list)) {		if (unlock) {			atomic_dec(&pb->pb_hold);		}		list_del(&pb->pb_list);	}	list_add_tail(&pb->pb_list, &pbd_delwrite_queue);	pb->pb_queuetime = jiffies;	spin_unlock(&pbd_delwrite_lock);	if (unlock)		pagebuf_unlock(pb);}voidpagebuf_delwri_dequeue(	xfs_buf_t		*pb){	int			dequeued = 0;	spin_lock(&pbd_delwrite_lock);	if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {		list_del_init(&pb->pb_list);		dequeued = 1;	}	pb->pb_flags &= ~PBF_DELWRI;	spin_unlock(&pbd_delwrite_lock);	if (dequeued)		pagebuf_rele(pb);	PB_TRACE(pb, "delwri_dq", (long)dequeued);}STATIC voidpagebuf_runall_queues(	struct workqueue_struct	*queue){	flush_workqueue(queue);}/* Defines for pagebuf daemon */STATIC DECLARE_COMPLETION(pagebuf_daemon_done);STATIC struct task_struct *pagebuf_daemon_task;STATIC int pagebuf_daemon_active;STATIC int force_flush;STATIC intpagebuf_daemon_wakeup(	int			priority,	unsigned int		mask){	force_flush = 1;	barrier();	wake_up_process(pagebuf_daemon_task);	return 0;}STATIC intpagebuf_daemon(	void			*data){	struct list_head	tmp;	unsigned long		age;	xfs_buftarg_t		*target;	xfs_buf_t		*pb, *n;	/*  Set up the thread  */	daemonize("xfsbufd");	current->flags |= PF_MEMALLOC;	pagebuf_daemon_task = current;	pagebuf_daemon_active = 1;	barrier();	INIT_LIST_HEAD(&tmp);	do {		/* swsusp */		if (current->flags & PF_FREEZE)			refrigerator(PF_FREEZE);		set_current_state(TASK_INTERRUPTIBLE);		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);		age = (xfs_buf_age_centisecs * HZ) / 100;		spin_lock(&pbd_delwrite_lock);		list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));			ASSERT(pb->pb_flags & PBF_DELWRI);			if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {				if (!force_flush &&				    time_before(jiffies,						pb->pb_queuetime + age)) {					pagebuf_unlock(pb);					break;				}				pb->pb_flags &= ~PBF_DELWRI;				pb->pb_flags |= PBF_WRITE;				list_move(&pb->pb_list, &tmp);			}		}		spin_unlock(&pbd_delwrite_lock);		while (!list_empty(&tmp)) {			pb = list_entry(tmp.next, xfs_buf_t, pb_list);			target = pb->pb_target;			list_del_init(&pb->pb_list);			pagebuf_iostrategy(pb);			blk_run_address_space(target->pbr_mapping);		}		if (as_list_len > 0)			purge_addresses();		force_flush = 0;	} while (pagebuf_daemon_active);	complete_and_exit(&pagebuf_daemon_done, 0);}/* * Go through all incore buffers, and release buffers if they belong to * the given device. This is used in filesystem error handling to * preserve the consistency of its metadata. */intxfs_flush_buftarg(	xfs_buftarg_t		*target,	int			wait){	struct list_head	tmp;	xfs_buf_t		*pb, *n;	int			pincount = 0;	pagebuf_runall_queues(pagebuf_dataio_workqueue);	pagebuf_runall_queues(pagebuf_logio_workqueue);	INIT_LIST_HEAD(&tmp);	spin_lock(&pbd_delwrite_lock);	list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {		if (pb->pb_target != target)			continue;		ASSERT(pb->pb_flags & PBF_DELWRI);		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));		if (pagebuf_ispin(pb)) {			pincount++;			continue;		}		pb->pb_flags &= ~PBF_DELWRI;		pb->pb_flags |= PBF_WRITE;		list_move(&pb->pb_list, &tmp);	}	spin_unlock(&pbd_delwrite_lock);	/*	 * Dropped the delayed write list lock, now walk the temporary list	 */	list_for_each_entry_safe(pb, n, &tmp, pb_list) {		if (wait)			pb->pb_flags &= ~PBF_ASYNC;		else			list_del_init(&pb->pb_list);		pagebuf_lock(pb);		pagebuf_iostrategy(pb);	}	/*	 * Remaining list items must be flushed before returning	 */	while (!list_empty(&tmp)) {		pb = list_entry(tmp.next, xfs_buf_t, pb_list);		list_del_init(&pb->pb_list);		xfs_iowait(pb);		xfs_buf_relse(pb);	}	if (wait)		blk_run_address_space(target->pbr_mapping);	return pincount;}STATIC intpagebuf_daemon_start(void){	int		rval;	pagebuf_logio_workqueue = create_workqueue("xfslogd");	if (!pagebuf_logio_workqueue)		return -ENOMEM;	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");	if (!pagebuf_dataio_workqueue) {		destroy_workqueue(pagebuf_logio_workqueue);		return -ENOMEM;	}	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);	if (rval < 0) {		destroy_workqueue(pagebuf_logio_workqueue);		destroy_workqueue(pagebuf_dataio_workqueue);	}	return rval;}/* * pagebuf_daemon_stop * * Note: do not mark as __exit, it is called from pagebuf_terminate. */STATIC voidpagebuf_daemon_stop(void){	pagebuf_daemon_active = 0;	barrier();	wait_for_completion(&pagebuf_daemon_done);	destroy_workqueue(pagebuf_logio_workqueue);	destroy_workqueue(pagebuf_dataio_workqueue);}/* *	Initialization and Termination */int __initpagebuf_init(void){	int			i;	pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,			SLAB_HWCACHE_ALIGN, NULL, NULL);	if (pagebuf_cache == NULL) {		printk("XFS: couldn't init xfs_buf_t cache\n");		pagebuf_terminate();		return -ENOMEM;	}#ifdef PAGEBUF_TRACE	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);#endif	pagebuf_daemon_start();	pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);	if (pagebuf_shake == NULL) {		pagebuf_terminate();		return -ENOMEM;	}	for (i = 0; i < NHASH; i++) {		spin_lock_init(&pbhash[i].pb_hash_lock);		INIT_LIST_HEAD(&pbhash[i].pb_hash);	}	return 0;}/* *	pagebuf_terminate. * *	Note: do not mark as __exit, this is also called from the __init code. */voidpagebuf_terminate(void){	pagebuf_daemon_stop();#ifdef PAGEBUF_TRACE	ktrace_free(pagebuf_trace_buf);#endif	kmem_zone_destroy(pagebuf_cache);	kmem_shake_deregister(pagebuf_shake);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?