📄 raid5.c

📁 linux和2410结合开发用他可以生成2410所需的zImage文件
💻 C
📖 第 1 页 / 共 4 页
字号:
		}	}	/*	 * handle errors in spares (during reconstruction)	 */	if (conf->spare) {		disk = conf->spare;		if (disk->dev == dev) {			printk (KERN_ALERT				"raid5: Disk failure on spare %s\n",				partition_name (dev));			if (!conf->spare->operational) {				/* probably a SET_DISK_FAULTY ioctl */				return -EIO;			}			disk->operational = 0;			disk->write_only = 0;			conf->spare = NULL;			mark_disk_faulty(sb->disks+disk->number);			mark_disk_nonsync(sb->disks+disk->number);			mark_disk_inactive(sb->disks+disk->number);			sb->spare_disks--;			sb->working_disks--;			sb->failed_disks++;			mddev->sb_dirty = 1;			md_wakeup_thread(conf->thread);			return 0;		}	}	MD_BUG();	return -EIO;}	/* * Input: a 'big' sector number, * Output: index of the data and parity disk, and the sector # in them. */static unsigned long raid5_compute_sector(unsigned long r_sector, unsigned int raid_disks,			unsigned int data_disks, unsigned int * dd_idx,			unsigned int * pd_idx, raid5_conf_t *conf){	unsigned long stripe;	unsigned long chunk_number;	unsigned int chunk_offset;	unsigned long new_sector;	int sectors_per_chunk = conf->chunk_size >> 9;	/* First compute the information on this sector */	/*	 * Compute the chunk number and the sector offset inside the chunk	 */	chunk_number = r_sector / sectors_per_chunk;	chunk_offset = r_sector % sectors_per_chunk;	/*	 * Compute the stripe number	 */	stripe = chunk_number / data_disks;	/*	 * Compute the data disk and parity disk indexes inside the stripe	 */	*dd_idx = chunk_number % data_disks;	/*	 * Select the parity disk based on the user selected algorithm.	 */	if (conf->level == 4)		*pd_idx = data_disks;	else switch (conf->algorithm) {		case ALGORITHM_LEFT_ASYMMETRIC:			*pd_idx = data_disks - stripe % raid_disks;			if (*dd_idx >= *pd_idx)				(*dd_idx)++;			break;		case ALGORITHM_RIGHT_ASYMMETRIC:			*pd_idx = stripe % raid_disks;			if (*dd_idx >= *pd_idx)				(*dd_idx)++;			break;		case ALGORITHM_LEFT_SYMMETRIC:			*pd_idx = data_disks - stripe % raid_disks;			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;			break;		case ALGORITHM_RIGHT_SYMMETRIC:			*pd_idx = stripe % raid_disks;			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;			break;		default:			printk ("raid5: unsupported algorithm %d\n", conf->algorithm);	}	/*	 * Finally, compute the new sector number	 */	new_sector = stripe * sectors_per_chunk + chunk_offset;	return new_sector;}#if 0static unsigned long compute_blocknr(struct stripe_head *sh, int i){	raid5_conf_t *conf = sh->raid_conf;	int raid_disks = conf->raid_disks, data_disks = raid_disks - 1;	unsigned long new_sector = sh->sector, check;	int sectors_per_chunk = conf->chunk_size >> 9;	unsigned long stripe = new_sector / sectors_per_chunk;	int chunk_offset = new_sector % sectors_per_chunk;	int chunk_number, dummy1, dummy2, dd_idx = i;	unsigned long r_sector, blocknr;	switch (conf->algorithm) {		case ALGORITHM_LEFT_ASYMMETRIC:		case ALGORITHM_RIGHT_ASYMMETRIC:			if (i > sh->pd_idx)				i--;			break;		case ALGORITHM_LEFT_SYMMETRIC:		case ALGORITHM_RIGHT_SYMMETRIC:			if (i < sh->pd_idx)				i += raid_disks;			i -= (sh->pd_idx + 1);			break;		default:			printk ("raid5: unsupported algorithm %d\n", conf->algorithm);	}	chunk_number = stripe * data_disks + i;	r_sector = chunk_number * sectors_per_chunk + chunk_offset;	blocknr = r_sector / (sh->size >> 9);	check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);	if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {		printk("compute_blocknr: map not correct\n");		return 0;	}	return blocknr;}#endif#define check_xor() 	do { 					\			   if (count == MAX_XOR_BLOCKS) {	\				xor_block(count, bh_ptr);	\				count = 1;			\			   }					\			} while(0)static void compute_block(struct stripe_head *sh, int dd_idx){	raid5_conf_t *conf = sh->raid_conf;	int i, count, disks = conf->raid_disks;	struct buffer_head *bh_ptr[MAX_XOR_BLOCKS], *bh;	PRINTK("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx);	memset(sh->bh_cache[dd_idx]->b_data, 0, sh->size);	bh_ptr[0] = sh->bh_cache[dd_idx];	count = 1;	for (i = disks ; i--; ) {		if (i == dd_idx)			continue;		bh = sh->bh_cache[i];		if (buffer_uptodate(bh))			bh_ptr[count++] = bh;		else			printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i);		check_xor();	}	if (count != 1)		xor_block(count, bh_ptr);	set_bit(BH_Uptodate, &sh->bh_cache[dd_idx]->b_state);}static void compute_parity(struct stripe_head *sh, int method){	raid5_conf_t *conf = sh->raid_conf;	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;	struct buffer_head *bh_ptr[MAX_XOR_BLOCKS];	struct buffer_head *chosen[MD_SB_DISKS];	PRINTK("compute_parity, stripe %lu, method %d\n", sh->sector, method);	memset(chosen, 0, sizeof(chosen));	count = 1;	bh_ptr[0] = sh->bh_cache[pd_idx];	switch(method) {	case READ_MODIFY_WRITE:		if (!buffer_uptodate(sh->bh_cache[pd_idx]))			BUG();		for (i=disks ; i-- ;) {			if (i==pd_idx)				continue;			if (sh->bh_write[i] &&			    buffer_uptodate(sh->bh_cache[i])) {				bh_ptr[count++] = sh->bh_cache[i];				chosen[i] = sh->bh_write[i];				sh->bh_write[i] = sh->bh_write[i]->b_reqnext;				chosen[i]->b_reqnext = sh->bh_written[i];				sh->bh_written[i] = chosen[i];				check_xor();			}		}		break;	case RECONSTRUCT_WRITE:		memset(sh->bh_cache[pd_idx]->b_data, 0, sh->size);		for (i= disks; i-- ;)			if (i!=pd_idx && sh->bh_write[i]) {				chosen[i] = sh->bh_write[i];				sh->bh_write[i] = sh->bh_write[i]->b_reqnext;				chosen[i]->b_reqnext = sh->bh_written[i];				sh->bh_written[i] = chosen[i];			}		break;	case CHECK_PARITY:		break;	}	if (count>1) {		xor_block(count, bh_ptr);		count = 1;	}		for (i = disks; i--;)		if (chosen[i]) {			struct buffer_head *bh = sh->bh_cache[i];			char *bdata;			bdata = bh_kmap(chosen[i]);			memcpy(bh->b_data,			       bdata,sh->size);			bh_kunmap(chosen[i]);			set_bit(BH_Lock, &bh->b_state);			mark_buffer_uptodate(bh, 1);		}	switch(method) {	case RECONSTRUCT_WRITE:	case CHECK_PARITY:		for (i=disks; i--;)			if (i != pd_idx) {				bh_ptr[count++] = sh->bh_cache[i];				check_xor();			}		break;	case READ_MODIFY_WRITE:		for (i = disks; i--;)			if (chosen[i]) {				bh_ptr[count++] = sh->bh_cache[i];				check_xor();			}	}	if (count != 1)		xor_block(count, bh_ptr);		if (method != CHECK_PARITY) {		mark_buffer_uptodate(sh->bh_cache[pd_idx], 1);		set_bit(BH_Lock, &sh->bh_cache[pd_idx]->b_state);	} else		mark_buffer_uptodate(sh->bh_cache[pd_idx], 0);}static void add_stripe_bh (struct stripe_head *sh, struct buffer_head *bh, int dd_idx, int rw){	struct buffer_head **bhp;	raid5_conf_t *conf = sh->raid_conf;	PRINTK("adding bh b#%lu to stripe s#%lu\n", bh->b_blocknr, sh->sector);	spin_lock(&sh->lock);	spin_lock_irq(&conf->device_lock);	bh->b_reqnext = NULL;	if (rw == READ)		bhp = &sh->bh_read[dd_idx];	else		bhp = &sh->bh_write[dd_idx];	while (*bhp) {		printk(KERN_NOTICE "raid5: multiple %d requests for sector %ld\n", rw, sh->sector);		bhp = & (*bhp)->b_reqnext;	}	*bhp = bh;	spin_unlock_irq(&conf->device_lock);	spin_unlock(&sh->lock);	PRINTK("added bh b#%lu to stripe s#%lu, disk %d.\n", bh->b_blocknr, sh->sector, dd_idx);}/* * handle_stripe - do things to a stripe. * * We lock the stripe and then examine the state of various bits * to see what needs to be done. * Possible results: *    return some read request which now have data *    return some write requests which are safely on disc *    schedule a read on some buffers *    schedule a write of some buffers *    return confirmation of parity correctness * * Parity calculations are done inside the stripe lock * buffers are taken off read_list or write_list, and bh_cache buffers * get BH_Lock set before the stripe lock is released. * */ static void handle_stripe(struct stripe_head *sh){	raid5_conf_t *conf = sh->raid_conf;	int disks = conf->raid_disks;	struct buffer_head *return_ok= NULL, *return_fail = NULL;	int action[MD_SB_DISKS];	int i;	int syncing;	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;	int failed_num=0;	struct buffer_head *bh;	PRINTK("handling stripe %ld, cnt=%d, pd_idx=%d\n", sh->sector, atomic_read(&sh->count), sh->pd_idx);	memset(action, 0, sizeof(action));	spin_lock(&sh->lock);	clear_bit(STRIPE_HANDLE, &sh->state);	clear_bit(STRIPE_DELAYED, &sh->state);	syncing = test_bit(STRIPE_SYNCING, &sh->state);	/* Now to look around and see what can be done */	for (i=disks; i--; ) {		bh = sh->bh_cache[i];		PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, bh->b_state, sh->bh_read[i], sh->bh_write[i], sh->bh_written[i]);		/* maybe we can reply to a read */		if (buffer_uptodate(bh) && sh->bh_read[i]) {			struct buffer_head *rbh, *rbh2;			PRINTK("Return read for disc %d\n", i);			spin_lock_irq(&conf->device_lock);			rbh = sh->bh_read[i];			sh->bh_read[i] = NULL;			spin_unlock_irq(&conf->device_lock);			while (rbh) {				char *bdata;				bdata = bh_kmap(rbh);				memcpy(bdata, bh->b_data, bh->b_size);				bh_kunmap(rbh);				rbh2 = rbh->b_reqnext;				rbh->b_reqnext = return_ok;				return_ok = rbh;				rbh = rbh2;			}		}		/* now count some things */		if (buffer_locked(bh)) locked++;		if (buffer_uptodate(bh)) uptodate++;				if (sh->bh_read[i]) to_read++;		if (sh->bh_write[i]) to_write++;		if (sh->bh_written[i]) written++;		if (!conf->disks[i].operational) {			failed++;			failed_num = i;		}	}	PRINTK("locked=%d uptodate=%d to_read=%d to_write=%d failed=%d failed_num=%d\n",	       locked, uptodate, to_read, to_write, failed, failed_num);	/* check if the array has lost two devices and, if so, some requests might	 * need to be failed	 */	if (failed > 1 && to_read+to_write) {		for (i=disks; i--; ) {			/* fail all writes first */			if (sh->bh_write[i]) to_write--;			while ((bh = sh->bh_write[i])) {				sh->bh_write[i] = bh->b_reqnext;				bh->b_reqnext = return_fail;				return_fail = bh;			}			/* fail any reads if this device is non-operational */			if (!conf->disks[i].operational) {				spin_lock_irq(&conf->device_lock);				if (sh->bh_read[i]) to_read--;				while ((bh = sh->bh_read[i])) {					sh->bh_read[i] = bh->b_reqnext;					bh->b_reqnext = return_fail;					return_fail = bh;				}				spin_unlock_irq(&conf->device_lock);			}		}	}	if (failed > 1 && syncing) {		md_done_sync(conf->mddev, (sh->size>>9) - sh->sync_redone,0);		clear_bit(STRIPE_SYNCING, &sh->state);		syncing = 0;	}	/* might be able to return some write requests if the parity block	 * is safe, or on a failed drive	 */	bh = sh->bh_cache[sh->pd_idx];	if ( written &&	     ( (conf->disks[sh->pd_idx].operational && !buffer_locked(bh) && buffer_uptodate(bh))	       || (failed == 1 && failed_num == sh->pd_idx))	    ) {	    /* any written block on a uptodate or failed drive can be returned */	    for (i=disks; i--; )		if (sh->bh_written[i]) {		    bh = sh->bh_cache[i];		    if (!conf->disks[sh->pd_idx].operational ||			(!buffer_locked(bh) && buffer_uptodate(bh)) ) {			/* maybe we can return some write requests */			struct buffer_head *wbh, *wbh2;			PRINTK("Return write for disc %d\n", i);			wbh = sh->bh_written[i];			sh->bh_written[i] = NULL;			while (wbh) {			    wbh2 = wbh->b_reqnext;			    wbh->b_reqnext = return_ok;			    return_ok = wbh;			    wbh = wbh2;			}		    }		}	}			/* Now we might consider reading some blocks, either to check/generate	 * parity, or to satisfy requests	 */	if (to_read || (syncing && (uptodate+failed < disks))) {		for (i=disks; i--;) {			bh = sh->bh_cache[i];			if (!buffer_locked(bh) && !buffer_uptodate(bh) &&			    (sh->bh_read[i] || syncing || (failed && sh->bh_read[failed_num]))) {				/* we would like to get this block, possibly				 * by computing it, but we might not be able to				 */				if (uptodate == disks-1) {					PRINTK("Computing block %d\n", i);					compute_block(sh, i);					uptodate++;				} else if (conf->disks[i].operational) {					set_bit(BH_Lock, &bh->b_state);					action[i] = READ+1;					/* if I am just reading this block and we don't have					   a failed drive, or any pending writes then sidestep the cache */					if (sh->bh_page[i]) BUG();					if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&					    ! syncing && !failed && !to_write) {						sh->bh_page[i] = sh->bh_cache[i]->b_page;						sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;						sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;					}					locked++;					PRINTK("Reading block %d (sync=%d)\n", i, syncing);					if (syncing)						md_sync_acct(conf->disks[i].dev, bh->b_size>>9);				}			}		}		set_bit(STRIPE_HANDLE, &sh->state);	}	/* now to consider writing and what else, if anything should be read */	if (to_write) {		int rmw=0, rcw=0;		for (i=disks ; i--;) {			/* would I have to read this buffer for read_modify_write */			bh = sh->bh_cache[i];			if ((sh->bh_write[i] || i == sh->pd_idx) &&			    (!buffer_locked(bh) || sh->bh_page[i]) &&			    !buffer_uptodate(bh)) {				if (conf->disks[i].operational /*				    && !(conf->resync_parity && i == sh->pd_idx) */					)					rmw++;				else rmw += 2*disks;  /* cannot read it */			}			/* Would I have to read this buffer for reconstruct_write */			if (!sh->bh_write[i] && i != sh->pd_idx &&			    (!buffer_locked(bh) || sh->bh_page[i]) &&			    !buffer_uptodate(bh)) {				if (conf->disks[i].operational) rcw++;				else rcw += 2*disks;			}		}		PRINTK("for sector %ld, rmw=%d rcw=%d\n", sh->sector, rmw, rcw);		set_bit(STRIPE_HANDLE, &sh->state);		if (rmw < rcw && rmw > 0)			/* prefer read-modify-write, but need to get some data */			for (i=disks; i--;) {				bh = sh->bh_cache[i];				if ((sh->bh_write[i] || i == sh->pd_idx) &&				    !buffer_locked(bh) && !buffer_uptodate(bh) &&				    conf->disks[i].operational) {					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))					{						PRINTK("Read_old block %d for r-m-w\n", i);						set_bit(BH_Lock, &bh->b_state);						action[i] = READ+1;						locked++;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -