raid10.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,781 行 · 第 1/4 页

C
1,781
字号
		return sectors_skipped;	}	if (chunks_skipped >= conf->raid_disks) {		/* if there has been nothing to do on any drive,		 * then there is nothing to do at all..		 */		sector_t sec = max_sector - sector_nr;		md_done_sync(mddev, sec, 1);		return sec + sectors_skipped;	}	/* make sure whole request will fit in a chunk - if chunks	 * are meaningful	 */	if (conf->near_copies < conf->raid_disks &&	    max_sector > (sector_nr | conf->chunk_mask))		max_sector = (sector_nr | conf->chunk_mask) + 1;	/*	 * If there is non-resync activity waiting for us then	 * put in a delay to throttle resync.	 */	if (!go_faster && waitqueue_active(&conf->wait_resume))		schedule_timeout(HZ);	device_barrier(conf, sector_nr + RESYNC_SECTORS);	/* Again, very different code for resync and recovery.	 * Both must result in an r10bio with a list of bios that	 * have bi_end_io, bi_sector, bi_bdev set,	 * and bi_private set to the r10bio.	 * For recovery, we may actually create several r10bios	 * with 2 bios in each, that correspond to the bios in the main one.	 * In this case, the subordinate r10bios link back through a	 * borrowed master_bio pointer, and the counter in the master	 * includes a ref from each subordinate.	 */	/* First, we decide what to do and set ->bi_end_io	 * To end_sync_read if we want to read, and	 * end_sync_write if we will want to write.	 */	if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {		/* recovery... the complicated one */		int i, j, k;		r10_bio = NULL;		for (i=0 ; i<conf->raid_disks; i++)			if (conf->mirrors[i].rdev &&			    !conf->mirrors[i].rdev->in_sync) {				/* want to reconstruct this device */				r10bio_t *rb2 = r10_bio;				r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);				spin_lock_irq(&conf->resync_lock);				conf->nr_pending++;				if (rb2) conf->barrier++;				spin_unlock_irq(&conf->resync_lock);				atomic_set(&r10_bio->remaining, 0);				r10_bio->master_bio = (struct bio*)rb2;				if (rb2)					atomic_inc(&rb2->remaining);				r10_bio->mddev = mddev;				set_bit(R10BIO_IsRecover, &r10_bio->state);				r10_bio->sector = raid10_find_virt(conf, sector_nr, i);				raid10_find_phys(conf, r10_bio);				for (j=0; j<conf->copies;j++) {					int d = r10_bio->devs[j].devnum;					if (conf->mirrors[d].rdev &&					    conf->mirrors[d].rdev->in_sync) {						/* This is where we read from */						bio = r10_bio->devs[0].bio;						bio->bi_next = biolist;						biolist = bio;						bio->bi_private = r10_bio;						bio->bi_end_io = end_sync_read;						bio->bi_rw = 0;						bio->bi_sector = r10_bio->devs[j].addr +							conf->mirrors[d].rdev->data_offset;						bio->bi_bdev = conf->mirrors[d].rdev->bdev;						atomic_inc(&conf->mirrors[d].rdev->nr_pending);						atomic_inc(&r10_bio->remaining);						/* and we write to 'i' */						for (k=0; k<conf->copies; k++)							if (r10_bio->devs[k].devnum == i)								break;						bio = r10_bio->devs[1].bio;						bio->bi_next = biolist;						biolist = bio;						bio->bi_private = r10_bio;						bio->bi_end_io = end_sync_write;						bio->bi_rw = 1;						bio->bi_sector = r10_bio->devs[k].addr +							conf->mirrors[i].rdev->data_offset;						bio->bi_bdev = conf->mirrors[i].rdev->bdev;						r10_bio->devs[0].devnum = d;						r10_bio->devs[1].devnum = i;						break;					}				}				if (j == conf->copies) {					BUG();				}			}		if (biolist == NULL) {			while (r10_bio) {				r10bio_t *rb2 = r10_bio;				r10_bio = (r10bio_t*) rb2->master_bio;				rb2->master_bio = NULL;				put_buf(rb2);			}			goto giveup;		}	} else {		/* resync. Schedule a read for every block at this virt offset */		int count = 0;		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);		spin_lock_irq(&conf->resync_lock);		conf->nr_pending++;		spin_unlock_irq(&conf->resync_lock);		r10_bio->mddev = mddev;		atomic_set(&r10_bio->remaining, 0);		r10_bio->master_bio = NULL;		r10_bio->sector = sector_nr;		set_bit(R10BIO_IsSync, &r10_bio->state);		raid10_find_phys(conf, r10_bio);		r10_bio->sectors = (sector_nr | conf->chunk_mask) - sector_nr +1;		spin_lock_irq(&conf->device_lock);		for (i=0; i<conf->copies; i++) {			int d = r10_bio->devs[i].devnum;			bio = r10_bio->devs[i].bio;			bio->bi_end_io = NULL;			if (conf->mirrors[d].rdev == NULL ||			    conf->mirrors[d].rdev->faulty)				continue;			atomic_inc(&conf->mirrors[d].rdev->nr_pending);			atomic_inc(&r10_bio->remaining);			bio->bi_next = biolist;			biolist = bio;			bio->bi_private = r10_bio;			bio->bi_end_io = end_sync_read;			bio->bi_rw = 0;			bio->bi_sector = r10_bio->devs[i].addr +				conf->mirrors[d].rdev->data_offset;			bio->bi_bdev = conf->mirrors[d].rdev->bdev;			count++;		}		spin_unlock_irq(&conf->device_lock);		if (count < 2) {			for (i=0; i<conf->copies; i++) {				int d = r10_bio->devs[i].devnum;				if (r10_bio->devs[i].bio->bi_end_io)					atomic_dec(&conf->mirrors[d].rdev->nr_pending);			}			put_buf(r10_bio);			goto giveup;		}	}	for (bio = biolist; bio ; bio=bio->bi_next) {		bio->bi_flags &= ~(BIO_POOL_MASK - 1);		if (bio->bi_end_io)			bio->bi_flags |= 1 << BIO_UPTODATE;		bio->bi_vcnt = 0;		bio->bi_idx = 0;		bio->bi_phys_segments = 0;		bio->bi_hw_segments = 0;		bio->bi_size = 0;	}	nr_sectors = 0;	do {		struct page *page;		int len = PAGE_SIZE;		disk = 0;		if (sector_nr + (len>>9) > max_sector)			len = (max_sector - sector_nr) << 9;		if (len == 0)			break;		for (bio= biolist ; bio ; bio=bio->bi_next) {			page = bio->bi_io_vec[bio->bi_vcnt].bv_page;			if (bio_add_page(bio, page, len, 0) == 0) {				/* stop here */				struct bio *bio2;				bio->bi_io_vec[bio->bi_vcnt].bv_page = page;				for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) {					/* remove last page from this bio */					bio2->bi_vcnt--;					bio2->bi_size -= len;					bio2->bi_flags &= ~(1<< BIO_SEG_VALID);				}				goto bio_full;			}			disk = i;		}		nr_sectors += len>>9;		sector_nr += len>>9;	} while (biolist->bi_vcnt < RESYNC_PAGES); bio_full:	r10_bio->sectors = nr_sectors;	while (biolist) {		bio = biolist;		biolist = biolist->bi_next;		bio->bi_next = NULL;		r10_bio = bio->bi_private;		r10_bio->sectors = nr_sectors;		if (bio->bi_end_io == end_sync_read) {			md_sync_acct(bio->bi_bdev, nr_sectors);			generic_make_request(bio);		}	}	return nr_sectors; giveup:	/* There is nowhere to write, so all non-sync	 * drives must be failed, so try the next chunk...	 */	{	int sec = max_sector - sector_nr;	sectors_skipped += sec;	chunks_skipped ++;	sector_nr = max_sector;	md_done_sync(mddev, sec, 1);	goto skipped;	}}static int run(mddev_t *mddev){	conf_t *conf;	int i, disk_idx;	mirror_info_t *disk;	mdk_rdev_t *rdev;	struct list_head *tmp;	int nc, fc;	sector_t stride, size;	if (mddev->level != 10) {		printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n",		       mdname(mddev), mddev->level);		goto out;	}	nc = mddev->layout & 255;	fc = (mddev->layout >> 8) & 255;	if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||	    (mddev->layout >> 16)) {		printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",		       mdname(mddev), mddev->layout);		goto out;	}	/*	 * copy the already verified devices into our private RAID10	 * bookkeeping area. [whatever we allocate in run(),	 * should be freed in stop()]	 */	conf = kmalloc(sizeof(conf_t), GFP_KERNEL);	mddev->private = conf;	if (!conf) {		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",			mdname(mddev));		goto out;	}	memset(conf, 0, sizeof(*conf));	conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,				 GFP_KERNEL);	if (!conf->mirrors) {		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",		       mdname(mddev));		goto out_free_conf;	}	memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);	conf->near_copies = nc;	conf->far_copies = fc;	conf->copies = nc*fc;	conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;	conf->chunk_shift = ffz(~mddev->chunk_size) - 9;	stride = mddev->size >> (conf->chunk_shift-1);	sector_div(stride, fc);	conf->stride = stride << conf->chunk_shift;	conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,						r10bio_pool_free, conf);	if (!conf->r10bio_pool) {		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",			mdname(mddev));		goto out_free_conf;	}	mddev->queue->unplug_fn = raid10_unplug;	mddev->queue->issue_flush_fn = raid10_issue_flush;	ITERATE_RDEV(mddev, rdev, tmp) {		disk_idx = rdev->raid_disk;		if (disk_idx >= mddev->raid_disks		    || disk_idx < 0)			continue;		disk = conf->mirrors + disk_idx;		disk->rdev = rdev;		blk_queue_stack_limits(mddev->queue,				       rdev->bdev->bd_disk->queue);		/* as we don't honour merge_bvec_fn, we must never risk		 * violating it, so limit ->max_sector to one PAGE, as		 * a one page request is never in violation.		 */		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&		    mddev->queue->max_sectors > (PAGE_SIZE>>9))			mddev->queue->max_sectors = (PAGE_SIZE>>9);		disk->head_position = 0;		if (!rdev->faulty && rdev->in_sync)			conf->working_disks++;	}	conf->raid_disks = mddev->raid_disks;	conf->mddev = mddev;	conf->device_lock = SPIN_LOCK_UNLOCKED;	INIT_LIST_HEAD(&conf->retry_list);	conf->resync_lock = SPIN_LOCK_UNLOCKED;	init_waitqueue_head(&conf->wait_idle);	init_waitqueue_head(&conf->wait_resume);	if (!conf->working_disks) {		printk(KERN_ERR "raid10: no operational mirrors for %s\n",			mdname(mddev));		goto out_free_conf;	}	mddev->degraded = 0;	for (i = 0; i < conf->raid_disks; i++) {		disk = conf->mirrors + i;		if (!disk->rdev) {			disk->head_position = 0;			mddev->degraded++;		}	}	mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10");	if (!mddev->thread) {		printk(KERN_ERR		       "raid10: couldn't allocate thread for %s\n",		       mdname(mddev));		goto out_free_conf;	}	printk(KERN_INFO		"raid10: raid set %s active with %d out of %d devices\n",		mdname(mddev), mddev->raid_disks - mddev->degraded,		mddev->raid_disks);	/*	 * Ok, everything is just fine now	 */	size = conf->stride * conf->raid_disks;	sector_div(size, conf->near_copies);	mddev->array_size = size/2;	mddev->resync_max_sectors = size;	/* Calculate max read-ahead size.	 * We need to readahead at least twice a whole stripe....	 * maybe...	 */	{		int stripe = conf->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE;		stripe /= conf->near_copies;		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)			mddev->queue->backing_dev_info.ra_pages = 2* stripe;	}	if (conf->near_copies < mddev->raid_disks)		blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);	return 0;out_free_conf:	if (conf->r10bio_pool)		mempool_destroy(conf->r10bio_pool);	if (conf->mirrors)		kfree(conf->mirrors);	kfree(conf);	mddev->private = NULL;out:	return -EIO;}static int stop(mddev_t *mddev){	conf_t *conf = mddev_to_conf(mddev);	md_unregister_thread(mddev->thread);	mddev->thread = NULL;	if (conf->r10bio_pool)		mempool_destroy(conf->r10bio_pool);	if (conf->mirrors)		kfree(conf->mirrors);	kfree(conf);	mddev->private = NULL;	return 0;}static mdk_personality_t raid10_personality ={	.name		= "raid10",	.owner		= THIS_MODULE,	.make_request	= make_request,	.run		= run,	.stop		= stop,	.status		= status,	.error_handler	= error,	.hot_add_disk	= raid10_add_disk,	.hot_remove_disk= raid10_remove_disk,	.spare_active	= raid10_spare_active,	.sync_request	= sync_request,};static int __init raid_init(void){	return register_md_personality(RAID10, &raid10_personality);}static void raid_exit(void){	unregister_md_personality(RAID10);}module_init(raid_init);module_exit(raid_exit);MODULE_LICENSE("GPL");MODULE_ALIAS("md-personality-9"); /* RAID10 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?