raid1.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,426 行 · 第 1/3 页

C
1,426
字号
	return ret;}/* * Throttle resync depth, so that we can both get proper overlapping of * requests, but are still able to handle normal requests quickly. */#define RESYNC_DEPTH 32static void device_barrier(conf_t *conf, sector_t sect){	spin_lock_irq(&conf->resync_lock);	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),			    conf->resync_lock, unplug_slaves(conf->mddev));		if (!conf->barrier++) {		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,				    conf->resync_lock, unplug_slaves(conf->mddev));		if (conf->nr_pending)			BUG();	}	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,			    conf->resync_lock, unplug_slaves(conf->mddev));	conf->next_resync = sect;	spin_unlock_irq(&conf->resync_lock);}static int make_request(request_queue_t *q, struct bio * bio){	mddev_t *mddev = q->queuedata;	conf_t *conf = mddev_to_conf(mddev);	mirror_info_t *mirror;	r1bio_t *r1_bio;	struct bio *read_bio;	int i, disks;	/*	 * Register the new request and wait if the reconstruction	 * thread has put up a bar for new requests.	 * Continue immediately if no resync is active currently.	 */	spin_lock_irq(&conf->resync_lock);	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );	conf->nr_pending++;	spin_unlock_irq(&conf->resync_lock);	if (bio_data_dir(bio)==WRITE) {		disk_stat_inc(mddev->gendisk, writes);		disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bio));	} else {		disk_stat_inc(mddev->gendisk, reads);		disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bio));	}	/*	 * make_request() can abort the operation when READA is being	 * used and no empty request is available.	 *	 */	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);	r1_bio->master_bio = bio;	r1_bio->sectors = bio->bi_size >> 9;	r1_bio->mddev = mddev;	r1_bio->sector = bio->bi_sector;	r1_bio->state = 0;	if (bio_data_dir(bio) == READ) {		/*		 * read balancing logic:		 */		int rdisk = read_balance(conf, r1_bio);		if (rdisk < 0) {			/* couldn't find anywhere to read from */			raid_end_bio_io(r1_bio);			return 0;		}		mirror = conf->mirrors + rdisk;		r1_bio->read_disk = rdisk;		read_bio = bio_clone(bio, GFP_NOIO);		r1_bio->bios[rdisk] = read_bio;		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;		read_bio->bi_bdev = mirror->rdev->bdev;		read_bio->bi_end_io = raid1_end_read_request;		read_bio->bi_rw = READ;		read_bio->bi_private = r1_bio;		generic_make_request(read_bio);		return 0;	}	/*	 * WRITE:	 */	/* first select target devices under spinlock and	 * inc refcount on their rdev.  Record them by setting	 * bios[x] to bio	 */	disks = conf->raid_disks;	spin_lock_irq(&conf->device_lock);	for (i = 0;  i < disks; i++) {		if (conf->mirrors[i].rdev &&		    !conf->mirrors[i].rdev->faulty) {			atomic_inc(&conf->mirrors[i].rdev->nr_pending);			r1_bio->bios[i] = bio;		} else			r1_bio->bios[i] = NULL;	}	spin_unlock_irq(&conf->device_lock);	atomic_set(&r1_bio->remaining, 1);	md_write_start(mddev);	for (i = 0; i < disks; i++) {		struct bio *mbio;		if (!r1_bio->bios[i])			continue;		mbio = bio_clone(bio, GFP_NOIO);		r1_bio->bios[i] = mbio;		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;		mbio->bi_end_io	= raid1_end_write_request;		mbio->bi_rw = WRITE;		mbio->bi_private = r1_bio;		atomic_inc(&r1_bio->remaining);		generic_make_request(mbio);	}	if (atomic_dec_and_test(&r1_bio->remaining)) {		md_write_end(mddev);		raid_end_bio_io(r1_bio);	}	return 0;}static void status(struct seq_file *seq, mddev_t *mddev){	conf_t *conf = mddev_to_conf(mddev);	int i;	seq_printf(seq, " [%d/%d] [", conf->raid_disks,						conf->working_disks);	for (i = 0; i < conf->raid_disks; i++)		seq_printf(seq, "%s",			      conf->mirrors[i].rdev &&			      conf->mirrors[i].rdev->in_sync ? "U" : "_");	seq_printf(seq, "]");}static void error(mddev_t *mddev, mdk_rdev_t *rdev){	char b[BDEVNAME_SIZE];	conf_t *conf = mddev_to_conf(mddev);	/*	 * If it is not operational, then we have already marked it as dead	 * else if it is the last working disks, ignore the error, let the	 * next level up know.	 * else mark the drive as failed	 */	if (rdev->in_sync	    && conf->working_disks == 1)		/*		 * Don't fail the drive, act as though we were just a		 * normal single drive		 */		return;	if (rdev->in_sync) {		mddev->degraded++;		conf->working_disks--;		/*		 * if recovery is running, make sure it aborts.		 */		set_bit(MD_RECOVERY_ERR, &mddev->recovery);	}	rdev->in_sync = 0;	rdev->faulty = 1;	mddev->sb_dirty = 1;	printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"		"	Operation continuing on %d devices\n",		bdevname(rdev->bdev,b), conf->working_disks);}static void print_conf(conf_t *conf){	int i;	mirror_info_t *tmp;	printk("RAID1 conf printout:\n");	if (!conf) {		printk("(!conf)\n");		return;	}	printk(" --- wd:%d rd:%d\n", conf->working_disks,		conf->raid_disks);	for (i = 0; i < conf->raid_disks; i++) {		char b[BDEVNAME_SIZE];		tmp = conf->mirrors + i;		if (tmp->rdev)			printk(" disk %d, wo:%d, o:%d, dev:%s\n",				i, !tmp->rdev->in_sync, !tmp->rdev->faulty,				bdevname(tmp->rdev->bdev,b));	}}static void close_sync(conf_t *conf){	spin_lock_irq(&conf->resync_lock);	wait_event_lock_irq(conf->wait_resume, !conf->barrier,			    conf->resync_lock, 	unplug_slaves(conf->mddev));	spin_unlock_irq(&conf->resync_lock);	if (conf->barrier) BUG();	if (waitqueue_active(&conf->wait_idle)) BUG();	mempool_destroy(conf->r1buf_pool);	conf->r1buf_pool = NULL;}static int raid1_spare_active(mddev_t *mddev){	int i;	conf_t *conf = mddev->private;	mirror_info_t *tmp;	spin_lock_irq(&conf->device_lock);	/*	 * Find all failed disks within the RAID1 configuration 	 * and mark them readable	 */	for (i = 0; i < conf->raid_disks; i++) {		tmp = conf->mirrors + i;		if (tmp->rdev 		    && !tmp->rdev->faulty		    && !tmp->rdev->in_sync) {			conf->working_disks++;			mddev->degraded--;			tmp->rdev->in_sync = 1;		}	}	spin_unlock_irq(&conf->device_lock);	print_conf(conf);	return 0;}static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev){	conf_t *conf = mddev->private;	int found = 0;	int mirror;	mirror_info_t *p;	spin_lock_irq(&conf->device_lock);	for (mirror=0; mirror < mddev->raid_disks; mirror++)		if ( !(p=conf->mirrors+mirror)->rdev) {			p->rdev = rdev;			blk_queue_stack_limits(mddev->queue,					       rdev->bdev->bd_disk->queue);			/* as we don't honour merge_bvec_fn, we must never risk			 * violating it, so limit ->max_sector to one PAGE, as			 * a one page request is never in violation.			 */			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&			    mddev->queue->max_sectors > (PAGE_SIZE>>9))				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);			p->head_position = 0;			rdev->raid_disk = mirror;			found = 1;			break;		}	spin_unlock_irq(&conf->device_lock);	print_conf(conf);	return found;}static int raid1_remove_disk(mddev_t *mddev, int number){	conf_t *conf = mddev->private;	int err = 1;	mirror_info_t *p = conf->mirrors+ number;	print_conf(conf);	spin_lock_irq(&conf->device_lock);	if (p->rdev) {		if (p->rdev->in_sync ||		    atomic_read(&p->rdev->nr_pending)) {			err = -EBUSY;			goto abort;		}		p->rdev = NULL;		err = 0;	}	if (err)		MD_BUG();abort:	spin_unlock_irq(&conf->device_lock);	print_conf(conf);	return err;}static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error){	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);	conf_t *conf = mddev_to_conf(r1_bio->mddev);	if (bio->bi_size)		return 1;	if (r1_bio->bios[r1_bio->read_disk] != bio)		BUG();	update_head_pos(r1_bio->read_disk, r1_bio);	/*	 * we have read a block, now it needs to be re-written,	 * or re-read if the read failed.	 * We don't do much here, just schedule handling by raid1d	 */	if (!uptodate)		md_error(r1_bio->mddev,			 conf->mirrors[r1_bio->read_disk].rdev);	else		set_bit(R1BIO_Uptodate, &r1_bio->state);	rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);	reschedule_retry(r1_bio);	return 0;}static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error){	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);	mddev_t *mddev = r1_bio->mddev;	conf_t *conf = mddev_to_conf(mddev);	int i;	int mirror=0;	if (bio->bi_size)		return 1;	for (i = 0; i < conf->raid_disks; i++)		if (r1_bio->bios[i] == bio) {			mirror = i;			break;		}	if (!uptodate)		md_error(mddev, conf->mirrors[mirror].rdev);	update_head_pos(mirror, r1_bio);	if (atomic_dec_and_test(&r1_bio->remaining)) {		md_done_sync(mddev, r1_bio->sectors, uptodate);		put_buf(r1_bio);	}	rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);	return 0;}static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio){	conf_t *conf = mddev_to_conf(mddev);	int i;	int disks = conf->raid_disks;	struct bio *bio, *wbio;	bio = r1_bio->bios[r1_bio->read_disk];	/*	 * schedule writes	 */	if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {		/*		 * There is no point trying a read-for-reconstruct as		 * reconstruct is about to be aborted		 */		char b[BDEVNAME_SIZE];		printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"			" for block %llu\n",			bdevname(bio->bi_bdev,b), 			(unsigned long long)r1_bio->sector);		md_done_sync(mddev, r1_bio->sectors, 0);		put_buf(r1_bio);		return;	}	atomic_set(&r1_bio->remaining, 1);	for (i = 0; i < disks ; i++) {		wbio = r1_bio->bios[i];		if (wbio->bi_end_io != end_sync_write)			continue;		atomic_inc(&conf->mirrors[i].rdev->nr_pending);		atomic_inc(&r1_bio->remaining);		md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);		generic_make_request(wbio);	}	if (atomic_dec_and_test(&r1_bio->remaining)) {		md_done_sync(mddev, r1_bio->sectors, 1);		put_buf(r1_bio);	}}/* * This is a kernel thread which: * *	1.	Retries failed read operations on working mirrors. *	2.	Updates the raid superblock when problems encounter. *	3.	Performs writes following reads for array syncronising. */static void raid1d(mddev_t *mddev){	struct list_head *head = &retry_list_head;	r1bio_t *r1_bio;	struct bio *bio;	unsigned long flags;	conf_t *conf = mddev_to_conf(mddev);	int unplug=0;	mdk_rdev_t *rdev;	md_check_recovery(mddev);	md_handle_safemode(mddev);		for (;;) {		char b[BDEVNAME_SIZE];		spin_lock_irqsave(&retry_list_lock, flags);		if (list_empty(head))			break;		r1_bio = list_entry(head->prev, r1bio_t, retry_list);		list_del(head->prev);		spin_unlock_irqrestore(&retry_list_lock, flags);		mddev = r1_bio->mddev;		conf = mddev_to_conf(mddev);		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {			sync_request_write(mddev, r1_bio);			unplug = 1;		} else {			int disk;			bio = r1_bio->bios[r1_bio->read_disk];			if ((disk=read_balance(conf, r1_bio)) == -1) {				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"				       " read error for block %llu\n",				       bdevname(bio->bi_bdev,b),				       (unsigned long long)r1_bio->sector);				raid_end_bio_io(r1_bio);			} else {				r1_bio->bios[r1_bio->read_disk] = NULL;				r1_bio->read_disk = disk;				r1_bio->bios[r1_bio->read_disk] = bio;				rdev = conf->mirrors[disk].rdev;				if (printk_ratelimit())					printk(KERN_ERR "raid1: %s: redirecting sector %llu to"					       " another mirror\n",					       bdevname(rdev->bdev,b),					       (unsigned long long)r1_bio->sector);				bio->bi_bdev = rdev->bdev;				bio->bi_sector = r1_bio->sector + rdev->data_offset;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?