raid1.c

来自「linux 内核源代码」· C语言 代码 · 共 2,200 行 · 第 1/4 页

C
2,200
字号
		 */		if (mddev->curr_resync < max_sector) /* aborted */			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,						&sync_blocks, 1);		else /* completed sync */			conf->fullsync = 0;		bitmap_close_sync(mddev->bitmap);		close_sync(conf);		return 0;	}	if (mddev->bitmap == NULL &&	    mddev->recovery_cp == MaxSector &&	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&	    conf->fullsync == 0) {		*skipped = 1;		return max_sector - sector_nr;	}	/* before building a request, check if we can skip these blocks..	 * This call the bitmap_start_sync doesn't actually record anything	 */	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&	    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {		/* We can skip this block, and probably several more */		*skipped = 1;		return sync_blocks;	}	/*	 * If there is non-resync activity waiting for a turn,	 * and resync is going fast enough,	 * then let it though before starting on this new sync request.	 */	if (!go_faster && conf->nr_waiting)		msleep_interruptible(1000);	raise_barrier(conf);	conf->next_resync = sector_nr;	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);	rcu_read_lock();	/*	 * If we get a correctably read error during resync or recovery,	 * we might want to read from a different device.  So we	 * flag all drives that could conceivably be read from for READ,	 * and any others (which will be non-In_sync devices) for WRITE.	 * If a read fails, we try reading from something else for which READ	 * is OK.	 */	r1_bio->mddev = mddev;	r1_bio->sector = sector_nr;	r1_bio->state = 0;	set_bit(R1BIO_IsSync, &r1_bio->state);	for (i=0; i < conf->raid_disks; i++) {		mdk_rdev_t *rdev;		bio = r1_bio->bios[i];		/* take from bio_init */		bio->bi_next = NULL;		bio->bi_flags |= 1 << BIO_UPTODATE;		bio->bi_rw = READ;		bio->bi_vcnt = 0;		bio->bi_idx = 0;		bio->bi_phys_segments = 0;		bio->bi_hw_segments = 0;		bio->bi_size = 0;		bio->bi_end_io = NULL;		bio->bi_private = NULL;		rdev = rcu_dereference(conf->mirrors[i].rdev);		if (rdev == NULL ||			   test_bit(Faulty, &rdev->flags)) {			still_degraded = 1;			continue;		} else if (!test_bit(In_sync, &rdev->flags)) {			bio->bi_rw = WRITE;			bio->bi_end_io = end_sync_write;			write_targets ++;		} else {			/* may need to read from here */			bio->bi_rw = READ;			bio->bi_end_io = end_sync_read;			if (test_bit(WriteMostly, &rdev->flags)) {				if (wonly < 0)					wonly = i;			} else {				if (disk < 0)					disk = i;			}			read_targets++;		}		atomic_inc(&rdev->nr_pending);		bio->bi_sector = sector_nr + rdev->data_offset;		bio->bi_bdev = rdev->bdev;		bio->bi_private = r1_bio;	}	rcu_read_unlock();	if (disk < 0)		disk = wonly;	r1_bio->read_disk = disk;	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)		/* extra read targets are also write targets */		write_targets += read_targets-1;	if (write_targets == 0 || read_targets == 0) {		/* There is nowhere to write, so all non-sync		 * drives must be failed - so we are finished		 */		sector_t rv = max_sector - sector_nr;		*skipped = 1;		put_buf(r1_bio);		return rv;	}	nr_sectors = 0;	sync_blocks = 0;	do {		struct page *page;		int len = PAGE_SIZE;		if (sector_nr + (len>>9) > max_sector)			len = (max_sector - sector_nr) << 9;		if (len == 0)			break;		if (sync_blocks == 0) {			if (!bitmap_start_sync(mddev->bitmap, sector_nr,					       &sync_blocks, still_degraded) &&			    !conf->fullsync &&			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))				break;			BUG_ON(sync_blocks < (PAGE_SIZE>>9));			if (len > (sync_blocks<<9))				len = sync_blocks<<9;		}		for (i=0 ; i < conf->raid_disks; i++) {			bio = r1_bio->bios[i];			if (bio->bi_end_io) {				page = bio->bi_io_vec[bio->bi_vcnt].bv_page;				if (bio_add_page(bio, page, len, 0) == 0) {					/* stop here */					bio->bi_io_vec[bio->bi_vcnt].bv_page = page;					while (i > 0) {						i--;						bio = r1_bio->bios[i];						if (bio->bi_end_io==NULL)							continue;						/* remove last page from this bio */						bio->bi_vcnt--;						bio->bi_size -= len;						bio->bi_flags &= ~(1<< BIO_SEG_VALID);					}					goto bio_full;				}			}		}		nr_sectors += len>>9;		sector_nr += len>>9;		sync_blocks -= (len>>9);	} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); bio_full:	r1_bio->sectors = nr_sectors;	/* For a user-requested sync, we read all readable devices and do a	 * compare	 */	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {		atomic_set(&r1_bio->remaining, read_targets);		for (i=0; i<conf->raid_disks; i++) {			bio = r1_bio->bios[i];			if (bio->bi_end_io == end_sync_read) {				md_sync_acct(bio->bi_bdev, nr_sectors);				generic_make_request(bio);			}		}	} else {		atomic_set(&r1_bio->remaining, 1);		bio = r1_bio->bios[r1_bio->read_disk];		md_sync_acct(bio->bi_bdev, nr_sectors);		generic_make_request(bio);	}	return nr_sectors;}static int run(mddev_t *mddev){	conf_t *conf;	int i, j, disk_idx;	mirror_info_t *disk;	mdk_rdev_t *rdev;	struct list_head *tmp;	if (mddev->level != 1) {		printk("raid1: %s: raid level not set to mirroring (%d)\n",		       mdname(mddev), mddev->level);		goto out;	}	if (mddev->reshape_position != MaxSector) {		printk("raid1: %s: reshape_position set but not supported\n",		       mdname(mddev));		goto out;	}	/*	 * copy the already verified devices into our private RAID1	 * bookkeeping area. [whatever we allocate in run(),	 * should be freed in stop()]	 */	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);	mddev->private = conf;	if (!conf)		goto out_no_mem;	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,				 GFP_KERNEL);	if (!conf->mirrors)		goto out_no_mem;	conf->tmppage = alloc_page(GFP_KERNEL);	if (!conf->tmppage)		goto out_no_mem;	conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);	if (!conf->poolinfo)		goto out_no_mem;	conf->poolinfo->mddev = mddev;	conf->poolinfo->raid_disks = mddev->raid_disks;	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,					  r1bio_pool_free,					  conf->poolinfo);	if (!conf->r1bio_pool)		goto out_no_mem;	ITERATE_RDEV(mddev, rdev, tmp) {		disk_idx = rdev->raid_disk;		if (disk_idx >= mddev->raid_disks		    || disk_idx < 0)			continue;		disk = conf->mirrors + disk_idx;		disk->rdev = rdev;		blk_queue_stack_limits(mddev->queue,				       rdev->bdev->bd_disk->queue);		/* as we don't honour merge_bvec_fn, we must never risk		 * violating it, so limit ->max_sector to one PAGE, as		 * a one page request is never in violation.		 */		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&		    mddev->queue->max_sectors > (PAGE_SIZE>>9))			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);		disk->head_position = 0;	}	conf->raid_disks = mddev->raid_disks;	conf->mddev = mddev;	spin_lock_init(&conf->device_lock);	INIT_LIST_HEAD(&conf->retry_list);	spin_lock_init(&conf->resync_lock);	init_waitqueue_head(&conf->wait_barrier);	bio_list_init(&conf->pending_bio_list);	bio_list_init(&conf->flushing_bio_list);	mddev->degraded = 0;	for (i = 0; i < conf->raid_disks; i++) {		disk = conf->mirrors + i;		if (!disk->rdev ||		    !test_bit(In_sync, &disk->rdev->flags)) {			disk->head_position = 0;			mddev->degraded++;			if (disk->rdev)				conf->fullsync = 1;		}	}	if (mddev->degraded == conf->raid_disks) {		printk(KERN_ERR "raid1: no operational mirrors for %s\n",			mdname(mddev));		goto out_free_conf;	}	if (conf->raid_disks - mddev->degraded == 1)		mddev->recovery_cp = MaxSector;	/*	 * find the first working one and use it as a starting point	 * to read balancing.	 */	for (j = 0; j < conf->raid_disks &&		     (!conf->mirrors[j].rdev ||		      !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++)		/* nothing */;	conf->last_used = j;	mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");	if (!mddev->thread) {		printk(KERN_ERR		       "raid1: couldn't allocate thread for %s\n",		       mdname(mddev));		goto out_free_conf;	}	printk(KERN_INFO 		"raid1: raid set %s active with %d out of %d mirrors\n",		mdname(mddev), mddev->raid_disks - mddev->degraded, 		mddev->raid_disks);	/*	 * Ok, everything is just fine now	 */	mddev->array_size = mddev->size;	mddev->queue->unplug_fn = raid1_unplug;	mddev->queue->backing_dev_info.congested_fn = raid1_congested;	mddev->queue->backing_dev_info.congested_data = mddev;	return 0;out_no_mem:	printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",	       mdname(mddev));out_free_conf:	if (conf) {		if (conf->r1bio_pool)			mempool_destroy(conf->r1bio_pool);		kfree(conf->mirrors);		safe_put_page(conf->tmppage);		kfree(conf->poolinfo);		kfree(conf);		mddev->private = NULL;	}out:	return -EIO;}static int stop(mddev_t *mddev){	conf_t *conf = mddev_to_conf(mddev);	struct bitmap *bitmap = mddev->bitmap;	int behind_wait = 0;	/* wait for behind writes to complete */	while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {		behind_wait++;		printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);		set_current_state(TASK_UNINTERRUPTIBLE);		schedule_timeout(HZ); /* wait a second */		/* need to kick something here to make sure I/O goes? */	}	md_unregister_thread(mddev->thread);	mddev->thread = NULL;	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/	if (conf->r1bio_pool)		mempool_destroy(conf->r1bio_pool);	kfree(conf->mirrors);	kfree(conf->poolinfo);	kfree(conf);	mddev->private = NULL;	return 0;}static int raid1_resize(mddev_t *mddev, sector_t sectors){	/* no resync is happening, and there is enough space	 * on all devices, so we can resize.	 * We need to make sure resync covers any new space.	 * If the array is shrinking we should possibly wait until	 * any io in the removed space completes, but it hardly seems	 * worth it.	 */	mddev->array_size = sectors>>1;	set_capacity(mddev->gendisk, mddev->array_size << 1);	mddev->changed = 1;	if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) {		mddev->recovery_cp = mddev->size << 1;		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);	}	mddev->size = mddev->array_size;	mddev->resync_max_sectors = sectors;	return 0;}static int raid1_reshape(mddev_t *mddev){	/* We need to:	 * 1/ resize the r1bio_pool	 * 2/ resize conf->mirrors	 *	 * We allocate a new r1bio_pool if we can.	 * Then raise a device barrier and wait until all IO stops.	 * Then resize conf->mirrors and swap in the new r1bio pool.	 *	 * At the same time, we "pack" the devices so that all the missing	 * devices have the higher raid_disk numbers.	 */	mempool_t *newpool, *oldpool;	struct pool_info *newpoolinfo;	mirror_info_t *newmirrors;	conf_t *conf = mddev_to_conf(mddev);	int cnt, raid_disks;	unsigned long flags;	int d, d2;	/* Cannot change chunk_size, layout, or level */	if (mddev->chunk_size != mddev->new_chunk ||	    mddev->layout != mddev->new_layout ||	    mddev->level != mddev->new_level) {		mddev->new_chunk = mddev->chunk_size;		mddev->new_layout = mddev->layout;		mddev->new_level = mddev->level;		return -EINVAL;	}	md_allow_write(mddev);	raid_disks = mddev->raid_disks + mddev->delta_disks;	if (raid_disks < conf->raid_disks) {		cnt=0;		for (d= 0; d < conf->raid_disks; d++)			if (conf->mirrors[d].rdev)				cnt++;		if (cnt > raid_disks)			return -EBUSY;	}	newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);	if (!newpoolinfo)		return -ENOMEM;	newpoolinfo->mddev = mddev;	newpoolinfo->raid_disks = raid_disks;	newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,				 r1bio_pool_free, newpoolinfo);	if (!newpool) {		kfree(newpoolinfo);		return -ENOMEM;	}	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);	if (!newmirrors) {		kfree(newpoolinfo);		mempool_destroy(newpool);		return -ENOMEM;	}	raise_barrier(conf);	/* ok, everything is stopped */	oldpool = conf->r1bio_pool;	conf->r1bio_pool = newpool;	for (d = d2 = 0; d < conf->raid_disks; d++) {		mdk_rdev_t *rdev = conf->mirrors[d].rdev;		if (rdev && rdev->raid_disk != d2) {			char nm[20];			sprintf(nm, "rd%d", rdev->raid_disk);			sysfs_remove_link(&mddev->kobj, nm);			rdev->raid_disk = d2;			sprintf(nm, "rd%d", rdev->raid_disk);			sysfs_remove_link(&mddev->kobj, nm);			if (sysfs_create_link(&mddev->kobj,					      &rdev->kobj, nm))				printk(KERN_WARNING				       "md/raid1: cannot register "				       "%s for %s\n",				       nm, mdname(mddev));		}		if (rdev)			newmirrors[d2++].rdev = rdev;	}	kfree(conf->mirrors);	conf->mirrors = newmirrors;	kfree(conf->poolinfo);	conf->poolinfo = newpoolinfo;	spin_lock_irqsave(&conf->device_lock, flags);	mddev->degraded += (raid_disks - conf->raid_disks);	spin_unlock_irqrestore(&conf->device_lock, flags);	conf->raid_disks = mddev->raid_disks = raid_disks;	mddev->delta_disks = 0;	conf->last_used = 0; /* just make sure it is in-range */	lower_barrier(conf);	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);	md_wakeup_thread(mddev->thread);	mempool_destroy(oldpool);	return 0;}static void raid1_quiesce(mddev_t *mddev, int state){	conf_t *conf = mddev_to_conf(mddev);	switch(state) {	case 1:		raise_barrier(conf);		break;	case 0:		lower_barrier(conf);		break;	}}static struct mdk_personality raid1_personality ={	.name		= "raid1",	.level		= 1,	.owner		= THIS_MODULE,	.make_request	= make_request,	.run		= run,	.stop		= stop,	.status		= status,	.error_handler	= error,	.hot_add_disk	= raid1_add_disk,	.hot_remove_disk= raid1_remove_disk,	.spare_active	= raid1_spare_active,	.sync_request	= sync_request,	.resize		= raid1_resize,	.check_reshape	= raid1_reshape,	.quiesce	= raid1_quiesce,};static int __init raid_init(void){	return register_md_personality(&raid1_personality);}static void raid_exit(void){	unregister_md_personality(&raid1_personality);}module_init(raid_init);module_exit(raid_exit);MODULE_LICENSE("GPL");MODULE_ALIAS("md-personality-3"); /* RAID1 */MODULE_ALIAS("md-raid1");MODULE_ALIAS("md-level-1");

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?