📄 raid1.c

📁 Linux内核源代码为压缩文件是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
		close_sync(conf);		sdisk = conf->mirrors + spare_disk;		sdisk->operational = 0;		sdisk->write_only = 0;		break;	/*	 * Activate (mark read-write) the (now sync) spare disk,	 * which means we switch it's 'raid position' (->raid_disk)	 * with the failed disk. (only the first 'conf->nr_disks'	 * slots are used for 'real' disks and we must preserve this	 * property)	 */	case DISKOP_SPARE_ACTIVE:		close_sync(conf);		sdisk = conf->mirrors + spare_disk;		fdisk = conf->mirrors + failed_disk;		spare_desc = &sb->disks[sdisk->number];		failed_desc = &sb->disks[fdisk->number];		if (spare_desc != *d) {			MD_BUG();			err = 1;			goto abort;		}		if (spare_desc->raid_disk != sdisk->raid_disk) {			MD_BUG();			err = 1;			goto abort;		}					if (sdisk->raid_disk != spare_disk) {			MD_BUG();			err = 1;			goto abort;		}		if (failed_desc->raid_disk != fdisk->raid_disk) {			MD_BUG();			err = 1;			goto abort;		}		if (fdisk->raid_disk != failed_disk) {			MD_BUG();			err = 1;			goto abort;		}		/*		 * do the switch finally		 */		xchg_values(*spare_desc, *failed_desc);		xchg_values(*fdisk, *sdisk);		/*		 * (careful, 'failed' and 'spare' are switched from now on)		 *		 * we want to preserve linear numbering and we want to		 * give the proper raid_disk number to the now activated		 * disk. (this means we switch back these values)		 */			xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);		xchg_values(sdisk->raid_disk, fdisk->raid_disk);		xchg_values(spare_desc->number, failed_desc->number);		xchg_values(sdisk->number, fdisk->number);		*d = failed_desc;		if (sdisk->dev == MKDEV(0,0))			sdisk->used_slot = 0;		/*		 * this really activates the spare.		 */		fdisk->spare = 0;		fdisk->write_only = 0;		/*		 * if we activate a spare, we definitely replace a		 * non-operational disk slot in the 'low' area of		 * the disk array.		 */		conf->working_disks++;		break;	case DISKOP_HOT_REMOVE_DISK:		rdisk = conf->mirrors + removed_disk;		if (rdisk->spare && (removed_disk < conf->raid_disks)) {			MD_BUG();				err = 1;			goto abort;		}		rdisk->dev = MKDEV(0,0);		rdisk->used_slot = 0;		conf->nr_disks--;		break;	case DISKOP_HOT_ADD_DISK:		adisk = conf->mirrors + added_disk;		added_desc = *d;		if (added_disk != added_desc->number) {			MD_BUG();				err = 1;			goto abort;		}		adisk->number = added_desc->number;		adisk->raid_disk = added_desc->raid_disk;		adisk->dev = MKDEV(added_desc->major,added_desc->minor);		adisk->operational = 0;		adisk->write_only = 0;		adisk->spare = 1;		adisk->used_slot = 1;		adisk->head_position = 0;		conf->nr_disks++;		break;	default:		MD_BUG();			err = 1;		goto abort;	}abort:	md_spin_unlock_irq(&conf->device_lock);	if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE)		/* should move to "END_REBUILD" when such exists */		raid1_shrink_buffers(conf);	print_raid1_conf(conf);	return err;}#define IO_ERROR KERN_ALERT \"raid1: %s: unrecoverable I/O read error for block %lu\n"#define REDIRECT_SECTOR KERN_ERR \"raid1: %s: redirecting sector %lu to another mirror\n"/* * This is a kernel thread which: * *	1.	Retries failed read operations on working mirrors. *	2.	Updates the raid superblock when problems encounter. *	3.	Performs writes following reads for array syncronising. */static void end_sync_write(struct buffer_head *bh, int uptodate);static void end_sync_read(struct buffer_head *bh, int uptodate);static void raid1d (void *data){	struct raid1_bh *r1_bh;	struct buffer_head *bh;	unsigned long flags;	mddev_t *mddev;	kdev_t dev;	for (;;) {		md_spin_lock_irqsave(&retry_list_lock, flags);		r1_bh = raid1_retry_list;		if (!r1_bh)			break;		raid1_retry_list = r1_bh->next_r1;		md_spin_unlock_irqrestore(&retry_list_lock, flags);		mddev = r1_bh->mddev;		if (mddev->sb_dirty) {			printk(KERN_INFO "dirty sb detected, updating.\n");			mddev->sb_dirty = 0;			md_update_sb(mddev);		}		bh = &r1_bh->bh_req;		switch(r1_bh->cmd) {		case SPECIAL:			/* have to allocate lots of bh structures and			 * schedule writes			 */			if (test_bit(R1BH_Uptodate, &r1_bh->state)) {				int i, sum_bhs = 0;				int disks = MD_SB_DISKS;				struct buffer_head *bhl, *mbh;				raid1_conf_t *conf;				int sectors = bh->b_size >> 9;								conf = mddev_to_conf(mddev);				bhl = raid1_alloc_bh(conf, conf->raid_disks); /* don't really need this many */				for (i = 0; i < disks ; i++) {					if (!conf->mirrors[i].operational)						continue;					if (i==conf->last_used)						/* we read from here, no need to write */						continue;					if (i < conf->raid_disks					    && !conf->resync_mirrors)						/* don't need to write this,						 * we are just rebuilding */						continue;					mbh = bhl;					if (!mbh) {						MD_BUG();						break;					}					bhl = mbh->b_next;					mbh->b_this_page = (struct buffer_head *)1;										/*				 * prepare mirrored bh (fields ordered for max mem throughput):				 */					mbh->b_blocknr    = bh->b_blocknr;					mbh->b_dev        = conf->mirrors[i].dev;					mbh->b_rdev	  = conf->mirrors[i].dev;					mbh->b_rsector	  = bh->b_blocknr * sectors;					mbh->b_state      = (1<<BH_Req) | (1<<BH_Dirty) |						(1<<BH_Mapped) | (1<<BH_Lock);					atomic_set(&mbh->b_count, 1);					mbh->b_size       = bh->b_size;					mbh->b_page	  = bh->b_page;					mbh->b_data	  = bh->b_data;					mbh->b_list       = BUF_LOCKED;					mbh->b_end_io     = end_sync_write;					mbh->b_private    = r1_bh;					mbh->b_next = r1_bh->mirror_bh_list;					r1_bh->mirror_bh_list = mbh;					sum_bhs++;				}				md_atomic_set(&r1_bh->remaining, sum_bhs);				if (bhl) raid1_free_bh(conf, bhl);				mbh = r1_bh->mirror_bh_list;				while (mbh) {					struct buffer_head *bh1 = mbh;					mbh = mbh->b_next;					generic_make_request(WRITE, bh1);					md_sync_acct(bh1->b_dev, bh1->b_size/512);				}			} else {				dev = bh->b_dev;				raid1_map (mddev, &bh->b_dev, bh->b_size >> 9);				if (bh->b_dev == dev) {					printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);					md_done_sync(mddev, bh->b_size>>10, 0);				} else {					printk (REDIRECT_SECTOR,						partition_name(bh->b_dev), bh->b_blocknr);					bh->b_rdev = bh->b_dev;					generic_make_request(READ, bh);				}			}			break;		case READ:		case READA:			dev = bh->b_dev;					raid1_map (mddev, &bh->b_dev, bh->b_size >> 9);			if (bh->b_dev == dev) {				printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);				raid1_end_bh_io(r1_bh, 0);			} else {				printk (REDIRECT_SECTOR,					partition_name(bh->b_dev), bh->b_blocknr);				bh->b_rdev = bh->b_dev;				generic_make_request (r1_bh->cmd, bh);			}			break;		}	}	md_spin_unlock_irqrestore(&retry_list_lock, flags);}#undef IO_ERROR#undef REDIRECT_SECTOR/* * Private kernel thread to reconstruct mirrors after an unclean * shutdown. */static void raid1syncd (void *data){	raid1_conf_t *conf = data;	mddev_t *mddev = conf->mddev;	if (!conf->resync_mirrors)		return;	if (conf->resync_mirrors == 2)		return;	down(&mddev->recovery_sem);	if (!md_do_sync(mddev, NULL)) {		/*		 * Only if everything went Ok.		 */		conf->resync_mirrors = 0;	}	close_sync(conf);	up(&mddev->recovery_sem);	raid1_shrink_buffers(conf);}/* * perform a "sync" on one "block" * * We need to make sure that no normal I/O request - particularly write * requests - conflict with active sync requests. * This is achieved by conceptually dividing the device space into a * number of sections: *  DONE: 0 .. a-1     These blocks are in-sync *  ACTIVE: a.. b-1    These blocks may have active sync requests, but *                     no normal IO requests *  READY: b .. c-1    These blocks have no normal IO requests - sync *                     request may be happening *  PENDING: c .. d-1  These blocks may have IO requests, but no new *                     ones will be added *  FUTURE:  d .. end  These blocks are not to be considered yet. IO may *                     be happening, but not sync * * We keep a *   phase    which flips (0 or 1) each time d moves and * a count of: *   z =  active io requests in FUTURE since d moved - marked with *        current phase *   y =  active io requests in FUTURE before d moved, or PENDING - *        marked with previous phase *   x =  active sync requests in READY *   w =  active sync requests in ACTIVE *   v =  active io requests in DONE * * Normally, a=b=c=d=0 and z= active io requests *   or a=b=c=d=END and v= active io requests * Allowed changes to a,b,c,d: * A:  c==d &&  y==0 -> d+=window, y=z, z=0, phase=!phase * B:  y==0 -> c=d * C:   b=c, w+=x, x=0 * D:  w==0 -> a=b * E: a==b==c==d==end -> a=b=c=d=0, z=v, v=0 * * At start of sync we apply A. * When y reaches 0, we apply B then A then being sync requests * When sync point reaches c-1, we wait for y==0, and W==0, and * then apply apply B then A then D then C. * Finally, we apply E * * The sync request simply issues a "read" against a working drive * This is marked so that on completion the raid1d thread is woken to * issue suitable write requests */static int raid1_sync_request (mddev_t *mddev, unsigned long block_nr){	raid1_conf_t *conf = mddev_to_conf(mddev);	struct mirror_info *mirror;	struct raid1_bh *r1_bh;	struct buffer_head *bh;	int bsize;	int disk;	spin_lock_irq(&conf->segment_lock);	if (!block_nr) {		/* initialize ...*/		int buffs;		conf->start_active = 0;		conf->start_ready = 0;		conf->start_pending = 0;		conf->start_future = 0;		conf->phase = 0;		/* we want enough buffers to hold twice the window of 128*/		buffs = 128 *2 / (PAGE_SIZE>>9);		buffs = raid1_grow_buffers(conf, buffs);		if (buffs < 2)			goto nomem;				conf->window = buffs*(PAGE_SIZE>>9)/2;		conf->cnt_future += conf->cnt_done+conf->cnt_pending;		conf->cnt_done = conf->cnt_pending = 0;		if (conf->cnt_ready || conf->cnt_active)			MD_BUG();	}	while ((block_nr<<1) >= conf->start_pending) {		PRINTK("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n",			block_nr<<1, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future,			conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future);		wait_event_lock_irq(conf->wait_done,					!conf->cnt_active,					conf->segment_lock);		wait_event_lock_irq(conf->wait_ready,					!conf->cnt_pending,					conf->segment_lock);		conf->start_active = conf->start_ready;		conf->start_ready = conf->start_pending;		conf->start_pending = conf->start_future;		conf->start_future = conf->start_future+conf->window;		// Note: falling off the end is not a problem		conf->phase = conf->phase ^1;		conf->cnt_active = conf->cnt_ready;		conf->cnt_ready = 0;		conf->cnt_pending = conf->cnt_future;		conf->cnt_future = 0;		wake_up(&conf->wait_done);	}	conf->cnt_ready++;	spin_unlock_irq(&conf->segment_lock);			/* If reconstructing, and >1 working disc,	 * could dedicate one to rebuild and others to	 * service read requests ..	 */	disk = conf->last_used;	/* make sure disk is operational */	while (!conf->mirrors[disk].operational) {		if (disk <= 0) disk = conf->raid_disks;		disk--;		if (disk == conf->last_used)			break;	}	conf->last_used = disk;		mirror = conf->mirrors+conf->last_used;		r1_bh = raid1_alloc_buf (conf);	r1_bh->master_bh = NULL;	r1_bh->mddev = mddev;	r1_bh->cmd = SPECIAL;	bh = &r1_bh->bh_req;	bh->b_blocknr = block_nr;	bsize = 1024;	while (!(bh->b_blocknr & 1) && bsize < PAGE_SIZE			&& (bh->b_blocknr+2)*(bsize>>10) < mddev->sb->size) {		bh->b_blocknr >>= 1;		bsize <<= 1;	}	bh->b_size = bsize;	bh->b_list = BUF_LOCKED;	bh->b_dev = mirror->dev;	bh->b_rdev = mirror->dev;	bh->b_state = (1<<BH_Req) | (1<<BH_Mapped) | (1<<BH_Lock);	if (!bh->b_page)		BUG();	if (!bh->b_data)		BUG();	if (bh->b_data != page_address(bh->b_page))		BUG();	bh->b_end_io = end_sync_read;	bh->b_private = r1_bh;	bh->b_rsector = block_nr<<1;	init_waitqueue_head(&bh->b_wait);	generic_make_request(READ, bh);	md_sync_acct(bh->b_dev, bh->b_size/512);	return (bsize >> 10);nomem:	raid1_shrink_buffers(conf);	spin_unlock_irq(&conf->segment_lock);	return -ENOMEM;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -