raid5.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,920 行 · 第 1/4 页

C
1,920
字号
/* * raid5.c : Multiple Devices driver for Linux *	   Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman *	   Copyright (C) 1999, 2000 Ingo Molnar * * RAID-5 management functions. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/config.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/raid/raid5.h>#include <linux/highmem.h>#include <asm/bitops.h>#include <asm/atomic.h>/* * Stripe cache */#define NR_STRIPES		256#define STRIPE_SIZE		PAGE_SIZE#define STRIPE_SHIFT		(PAGE_SHIFT - 9)#define STRIPE_SECTORS		(STRIPE_SIZE>>9)#define	IO_THRESHOLD		1#define HASH_PAGES		1#define HASH_PAGES_ORDER	0#define NR_HASH			(HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))#define HASH_MASK		(NR_HASH - 1)#define stripe_hash(conf, sect)	((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])/* bio's attached to a stripe+device for I/O are linked together in bi_sector * order without overlap.  There may be several bio's per stripe+device, and * a bio could span several devices. * When walking this list for a particular stripe+device, we must never proceed * beyond a bio that extends past this device, as the next bio might no longer * be valid. * This macro is used to determine the 'next' bio in the list, given the sector * of the current stripe+device */#define r5_next_bio(bio, sect) ( ( bio->bi_sector + (bio->bi_size>>9) < sect + STRIPE_SECTORS) ? bio->bi_next : NULL)/* * The following can be used to debug the driver */#define RAID5_DEBUG	0#define RAID5_PARANOIA	1#if RAID5_PARANOIA && defined(CONFIG_SMP)# define CHECK_DEVLOCK() if (!spin_is_locked(&conf->device_lock)) BUG()#else# define CHECK_DEVLOCK()#endif#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x)))#if RAID5_DEBUG#define inline#define __inline__#endifstatic void print_raid5_conf (raid5_conf_t *conf);static inline void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh){	if (atomic_dec_and_test(&sh->count)) {		if (!list_empty(&sh->lru))			BUG();		if (atomic_read(&conf->active_stripes)==0)			BUG();		if (test_bit(STRIPE_HANDLE, &sh->state)) {			if (test_bit(STRIPE_DELAYED, &sh->state))				list_add_tail(&sh->lru, &conf->delayed_list);			else				list_add_tail(&sh->lru, &conf->handle_list);			md_wakeup_thread(conf->mddev->thread);		} else {			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {				atomic_dec(&conf->preread_active_stripes);				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)					md_wakeup_thread(conf->mddev->thread);			}			list_add_tail(&sh->lru, &conf->inactive_list);			atomic_dec(&conf->active_stripes);			if (!conf->inactive_blocked ||			    atomic_read(&conf->active_stripes) < (NR_STRIPES*3/4))				wake_up(&conf->wait_for_stripe);		}	}}static void release_stripe(struct stripe_head *sh){	raid5_conf_t *conf = sh->raid_conf;	unsigned long flags;		spin_lock_irqsave(&conf->device_lock, flags);	__release_stripe(conf, sh);	spin_unlock_irqrestore(&conf->device_lock, flags);}static void remove_hash(struct stripe_head *sh){	PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);	if (sh->hash_pprev) {		if (sh->hash_next)			sh->hash_next->hash_pprev = sh->hash_pprev;		*sh->hash_pprev = sh->hash_next;		sh->hash_pprev = NULL;	}}static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh){	struct stripe_head **shp = &stripe_hash(conf, sh->sector);	PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);	CHECK_DEVLOCK();	if ((sh->hash_next = *shp) != NULL)		(*shp)->hash_pprev = &sh->hash_next;	*shp = sh;	sh->hash_pprev = shp;}/* find an idle stripe, make sure it is unhashed, and return it. */static struct stripe_head *get_free_stripe(raid5_conf_t *conf){	struct stripe_head *sh = NULL;	struct list_head *first;	CHECK_DEVLOCK();	if (list_empty(&conf->inactive_list))		goto out;	first = conf->inactive_list.next;	sh = list_entry(first, struct stripe_head, lru);	list_del_init(first);	remove_hash(sh);	atomic_inc(&conf->active_stripes);out:	return sh;}static void shrink_buffers(struct stripe_head *sh, int num){	struct page *p;	int i;	for (i=0; i<num ; i++) {		p = sh->dev[i].page;		if (!p)			continue;		sh->dev[i].page = NULL;		page_cache_release(p);	}}static int grow_buffers(struct stripe_head *sh, int num){	int i;	for (i=0; i<num; i++) {		struct page *page;		if (!(page = alloc_page(GFP_KERNEL))) {			return 1;		}		sh->dev[i].page = page;	}	return 0;}static void raid5_build_block (struct stripe_head *sh, int i);static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx){	raid5_conf_t *conf = sh->raid_conf;	int disks = conf->raid_disks, i;	if (atomic_read(&sh->count) != 0)		BUG();	if (test_bit(STRIPE_HANDLE, &sh->state))		BUG();		CHECK_DEVLOCK();	PRINTK("init_stripe called, stripe %llu\n", 		(unsigned long long)sh->sector);	remove_hash(sh);		sh->sector = sector;	sh->pd_idx = pd_idx;	sh->state = 0;	for (i=disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		if (dev->toread || dev->towrite || dev->written ||		    test_bit(R5_LOCKED, &dev->flags)) {			printk("sector=%llx i=%d %p %p %p %d\n",			       (unsigned long long)sh->sector, i, dev->toread,			       dev->towrite, dev->written,			       test_bit(R5_LOCKED, &dev->flags));			BUG();		}		dev->flags = 0;		raid5_build_block(sh, i);	}	insert_hash(conf, sh);}static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector){	struct stripe_head *sh;	CHECK_DEVLOCK();	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);	for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next)		if (sh->sector == sector)			return sh;	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);	return NULL;}static void unplug_slaves(mddev_t *mddev);static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector,					     int pd_idx, int noblock) {	struct stripe_head *sh;	PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector);	spin_lock_irq(&conf->device_lock);	do {		sh = __find_stripe(conf, sector);		if (!sh) {			if (!conf->inactive_blocked)				sh = get_free_stripe(conf);			if (noblock && sh == NULL)				break;			if (!sh) {				conf->inactive_blocked = 1;				wait_event_lock_irq(conf->wait_for_stripe,						    !list_empty(&conf->inactive_list) &&						    (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4)						     || !conf->inactive_blocked),						    conf->device_lock,						    unplug_slaves(conf->mddev);					);				conf->inactive_blocked = 0;			} else				init_stripe(sh, sector, pd_idx);		} else {			if (atomic_read(&sh->count)) {				if (!list_empty(&sh->lru))					BUG();			} else {				if (!test_bit(STRIPE_HANDLE, &sh->state))					atomic_inc(&conf->active_stripes);				if (list_empty(&sh->lru))					BUG();				list_del_init(&sh->lru);			}		}	} while (sh == NULL);	if (sh)		atomic_inc(&sh->count);	spin_unlock_irq(&conf->device_lock);	return sh;}static int grow_stripes(raid5_conf_t *conf, int num){	struct stripe_head *sh;	kmem_cache_t *sc;	int devs = conf->raid_disks;	sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));	sc = kmem_cache_create(conf->cache_name, 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),			       0, 0, NULL, NULL);	if (!sc)		return 1;	conf->slab_cache = sc;	while (num--) {		sh = kmem_cache_alloc(sc, GFP_KERNEL);		if (!sh)			return 1;		memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev));		sh->raid_conf = conf;		sh->lock = SPIN_LOCK_UNLOCKED;		if (grow_buffers(sh, conf->raid_disks)) {			shrink_buffers(sh, conf->raid_disks);			kmem_cache_free(sc, sh);			return 1;		}		/* we just created an active stripe so... */		atomic_set(&sh->count, 1);		atomic_inc(&conf->active_stripes);		INIT_LIST_HEAD(&sh->lru);		release_stripe(sh);	}	return 0;}static void shrink_stripes(raid5_conf_t *conf){	struct stripe_head *sh;	while (1) {		spin_lock_irq(&conf->device_lock);		sh = get_free_stripe(conf);		spin_unlock_irq(&conf->device_lock);		if (!sh)			break;		if (atomic_read(&sh->count))			BUG();		shrink_buffers(sh, conf->raid_disks);		kmem_cache_free(conf->slab_cache, sh);		atomic_dec(&conf->active_stripes);	}	kmem_cache_destroy(conf->slab_cache);	conf->slab_cache = NULL;}static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done,				   int error){ 	struct stripe_head *sh = bi->bi_private;	raid5_conf_t *conf = sh->raid_conf;	int disks = conf->raid_disks, i;	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);	if (bi->bi_size)		return 1;	for (i=0 ; i<disks; i++)		if (bi == &sh->dev[i].req)			break;	PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", 		(unsigned long long)sh->sector, i, atomic_read(&sh->count), 		uptodate);	if (i == disks) {		BUG();		return 0;	}	if (uptodate) {#if 0		struct bio *bio;		unsigned long flags;		spin_lock_irqsave(&conf->device_lock, flags);		/* we can return a buffer if we bypassed the cache or		 * if the top buffer is not in highmem.  If there are		 * multiple buffers, leave the extra work to		 * handle_stripe		 */		buffer = sh->bh_read[i];		if (buffer &&		    (!PageHighMem(buffer->b_page)		     || buffer->b_page == bh->b_page )			) {			sh->bh_read[i] = buffer->b_reqnext;			buffer->b_reqnext = NULL;		} else			buffer = NULL;		spin_unlock_irqrestore(&conf->device_lock, flags);		if (sh->bh_page[i]==bh->b_page)			set_buffer_uptodate(bh);		if (buffer) {			if (buffer->b_page != bh->b_page)				memcpy(buffer->b_data, bh->b_data, bh->b_size);			buffer->b_end_io(buffer, 1);		}#else		set_bit(R5_UPTODATE, &sh->dev[i].flags);#endif			} else {		md_error(conf->mddev, conf->disks[i].rdev);		clear_bit(R5_UPTODATE, &sh->dev[i].flags);	}	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);#if 0	/* must restore b_page before unlocking buffer... */	if (sh->bh_page[i] != bh->b_page) {		bh->b_page = sh->bh_page[i];		bh->b_data = page_address(bh->b_page);		clear_buffer_uptodate(bh);	}#endif	clear_bit(R5_LOCKED, &sh->dev[i].flags);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);	return 0;}static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,				    int error){ 	struct stripe_head *sh = bi->bi_private;	raid5_conf_t *conf = sh->raid_conf;	int disks = conf->raid_disks, i;	unsigned long flags;	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);	if (bi->bi_size)		return 1;	for (i=0 ; i<disks; i++)		if (bi == &sh->dev[i].req)			break;	PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),		uptodate);	if (i == disks) {		BUG();		return 0;	}	spin_lock_irqsave(&conf->device_lock, flags);	if (!uptodate)		md_error(conf->mddev, conf->disks[i].rdev);	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);		clear_bit(R5_LOCKED, &sh->dev[i].flags);	set_bit(STRIPE_HANDLE, &sh->state);	__release_stripe(conf, sh);	spin_unlock_irqrestore(&conf->device_lock, flags);	return 0;}static sector_t compute_blocknr(struct stripe_head *sh, int i);	static void raid5_build_block (struct stripe_head *sh, int i){	struct r5dev *dev = &sh->dev[i];	bio_init(&dev->req);	dev->req.bi_io_vec = &dev->vec;	dev->req.bi_vcnt++;	dev->vec.bv_page = dev->page;	dev->vec.bv_len = STRIPE_SIZE;	dev->vec.bv_offset = 0;	dev->req.bi_sector = sh->sector;	dev->req.bi_private = sh;	dev->flags = 0;	if (i != sh->pd_idx)		dev->sector = compute_blocknr(sh, i);}static void error(mddev_t *mddev, mdk_rdev_t *rdev){	char b[BDEVNAME_SIZE];	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;	PRINTK("raid5: error called\n");	if (!rdev->faulty) {		mddev->sb_dirty = 1;		if (rdev->in_sync) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?