raid5.c

来自「linux 内核源代码」· C语言 代码 · 共 2,325 行 · 第 1/5 页

C
2,325
字号
	if (grow_buffers(sh, conf->raid_disks)) {		shrink_buffers(sh, conf->raid_disks);		kmem_cache_free(conf->slab_cache, sh);		return 0;	}	sh->disks = conf->raid_disks;	/* we just created an active stripe so... */	atomic_set(&sh->count, 1);	atomic_inc(&conf->active_stripes);	INIT_LIST_HEAD(&sh->lru);	release_stripe(sh);	return 1;}static int grow_stripes(raid5_conf_t *conf, int num){	struct kmem_cache *sc;	int devs = conf->raid_disks;	sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));	sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));	conf->active_name = 0;	sc = kmem_cache_create(conf->cache_name[conf->active_name],			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),			       0, 0, NULL);	if (!sc)		return 1;	conf->slab_cache = sc;	conf->pool_size = devs;	while (num--)		if (!grow_one_stripe(conf))			return 1;	return 0;}#ifdef CONFIG_MD_RAID5_RESHAPEstatic int resize_stripes(raid5_conf_t *conf, int newsize){	/* Make all the stripes able to hold 'newsize' devices.	 * New slots in each stripe get 'page' set to a new page.	 *	 * This happens in stages:	 * 1/ create a new kmem_cache and allocate the required number of	 *    stripe_heads.	 * 2/ gather all the old stripe_heads and tranfer the pages across	 *    to the new stripe_heads.  This will have the side effect of	 *    freezing the array as once all stripe_heads have been collected,	 *    no IO will be possible.  Old stripe heads are freed once their	 *    pages have been transferred over, and the old kmem_cache is	 *    freed when all stripes are done.	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,	 *    we simple return a failre status - no need to clean anything up.	 * 4/ allocate new pages for the new slots in the new stripe_heads.	 *    If this fails, we don't bother trying the shrink the	 *    stripe_heads down again, we just leave them as they are.	 *    As each stripe_head is processed the new one is released into	 *    active service.	 *	 * Once step2 is started, we cannot afford to wait for a write,	 * so we use GFP_NOIO allocations.	 */	struct stripe_head *osh, *nsh;	LIST_HEAD(newstripes);	struct disk_info *ndisks;	int err = 0;	struct kmem_cache *sc;	int i;	if (newsize <= conf->pool_size)		return 0; /* never bother to shrink */	md_allow_write(conf->mddev);	/* Step 1 */	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),			       0, 0, NULL);	if (!sc)		return -ENOMEM;	for (i = conf->max_nr_stripes; i; i--) {		nsh = kmem_cache_alloc(sc, GFP_KERNEL);		if (!nsh)			break;		memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));		nsh->raid_conf = conf;		spin_lock_init(&nsh->lock);		list_add(&nsh->lru, &newstripes);	}	if (i) {		/* didn't get enough, give up */		while (!list_empty(&newstripes)) {			nsh = list_entry(newstripes.next, struct stripe_head, lru);			list_del(&nsh->lru);			kmem_cache_free(sc, nsh);		}		kmem_cache_destroy(sc);		return -ENOMEM;	}	/* Step 2 - Must use GFP_NOIO now.	 * OK, we have enough stripes, start collecting inactive	 * stripes and copying them over	 */	list_for_each_entry(nsh, &newstripes, lru) {		spin_lock_irq(&conf->device_lock);		wait_event_lock_irq(conf->wait_for_stripe,				    !list_empty(&conf->inactive_list),				    conf->device_lock,				    unplug_slaves(conf->mddev)			);		osh = get_free_stripe(conf);		spin_unlock_irq(&conf->device_lock);		atomic_set(&nsh->count, 1);		for(i=0; i<conf->pool_size; i++)			nsh->dev[i].page = osh->dev[i].page;		for( ; i<newsize; i++)			nsh->dev[i].page = NULL;		kmem_cache_free(conf->slab_cache, osh);	}	kmem_cache_destroy(conf->slab_cache);	/* Step 3.	 * At this point, we are holding all the stripes so the array	 * is completely stalled, so now is a good time to resize	 * conf->disks.	 */	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);	if (ndisks) {		for (i=0; i<conf->raid_disks; i++)			ndisks[i] = conf->disks[i];		kfree(conf->disks);		conf->disks = ndisks;	} else		err = -ENOMEM;	/* Step 4, return new stripes to service */	while(!list_empty(&newstripes)) {		nsh = list_entry(newstripes.next, struct stripe_head, lru);		list_del_init(&nsh->lru);		for (i=conf->raid_disks; i < newsize; i++)			if (nsh->dev[i].page == NULL) {				struct page *p = alloc_page(GFP_NOIO);				nsh->dev[i].page = p;				if (!p)					err = -ENOMEM;			}		release_stripe(nsh);	}	/* critical section pass, GFP_NOIO no longer needed */	conf->slab_cache = sc;	conf->active_name = 1-conf->active_name;	conf->pool_size = newsize;	return err;}#endifstatic int drop_one_stripe(raid5_conf_t *conf){	struct stripe_head *sh;	spin_lock_irq(&conf->device_lock);	sh = get_free_stripe(conf);	spin_unlock_irq(&conf->device_lock);	if (!sh)		return 0;	BUG_ON(atomic_read(&sh->count));	shrink_buffers(sh, conf->pool_size);	kmem_cache_free(conf->slab_cache, sh);	atomic_dec(&conf->active_stripes);	return 1;}static void shrink_stripes(raid5_conf_t *conf){	while (drop_one_stripe(conf))		;	if (conf->slab_cache)		kmem_cache_destroy(conf->slab_cache);	conf->slab_cache = NULL;}static void raid5_end_read_request(struct bio * bi, int error){ 	struct stripe_head *sh = bi->bi_private;	raid5_conf_t *conf = sh->raid_conf;	int disks = sh->disks, i;	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);	char b[BDEVNAME_SIZE];	mdk_rdev_t *rdev;	for (i=0 ; i<disks; i++)		if (bi == &sh->dev[i].req)			break;	pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",		(unsigned long long)sh->sector, i, atomic_read(&sh->count),		uptodate);	if (i == disks) {		BUG();		return;	}	if (uptodate) {		set_bit(R5_UPTODATE, &sh->dev[i].flags);		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {			rdev = conf->disks[i].rdev;			printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",			       mdname(conf->mddev), STRIPE_SECTORS,			       (unsigned long long)sh->sector + rdev->data_offset,			       bdevname(rdev->bdev, b));			clear_bit(R5_ReadError, &sh->dev[i].flags);			clear_bit(R5_ReWrite, &sh->dev[i].flags);		}		if (atomic_read(&conf->disks[i].rdev->read_errors))			atomic_set(&conf->disks[i].rdev->read_errors, 0);	} else {		const char *bdn = bdevname(conf->disks[i].rdev->bdev, b);		int retry = 0;		rdev = conf->disks[i].rdev;		clear_bit(R5_UPTODATE, &sh->dev[i].flags);		atomic_inc(&rdev->read_errors);		if (conf->mddev->degraded)			printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",			       mdname(conf->mddev),			       (unsigned long long)sh->sector + rdev->data_offset,			       bdn);		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))			/* Oh, no!!! */			printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",			       mdname(conf->mddev),			       (unsigned long long)sh->sector + rdev->data_offset,			       bdn);		else if (atomic_read(&rdev->read_errors)			 > conf->max_nr_stripes)			printk(KERN_WARNING			       "raid5:%s: Too many read errors, failing device %s.\n",			       mdname(conf->mddev), bdn);		else			retry = 1;		if (retry)			set_bit(R5_ReadError, &sh->dev[i].flags);		else {			clear_bit(R5_ReadError, &sh->dev[i].flags);			clear_bit(R5_ReWrite, &sh->dev[i].flags);			md_error(conf->mddev, rdev);		}	}	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);	clear_bit(R5_LOCKED, &sh->dev[i].flags);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static void raid5_end_write_request (struct bio *bi, int error){ 	struct stripe_head *sh = bi->bi_private;	raid5_conf_t *conf = sh->raid_conf;	int disks = sh->disks, i;	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);	for (i=0 ; i<disks; i++)		if (bi == &sh->dev[i].req)			break;	pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",		(unsigned long long)sh->sector, i, atomic_read(&sh->count),		uptodate);	if (i == disks) {		BUG();		return;	}	if (!uptodate)		md_error(conf->mddev, conf->disks[i].rdev);	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);		clear_bit(R5_LOCKED, &sh->dev[i].flags);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static sector_t compute_blocknr(struct stripe_head *sh, int i);	static void raid5_build_block (struct stripe_head *sh, int i){	struct r5dev *dev = &sh->dev[i];	bio_init(&dev->req);	dev->req.bi_io_vec = &dev->vec;	dev->req.bi_vcnt++;	dev->req.bi_max_vecs++;	dev->vec.bv_page = dev->page;	dev->vec.bv_len = STRIPE_SIZE;	dev->vec.bv_offset = 0;	dev->req.bi_sector = sh->sector;	dev->req.bi_private = sh;	dev->flags = 0;	dev->sector = compute_blocknr(sh, i);}static void error(mddev_t *mddev, mdk_rdev_t *rdev){	char b[BDEVNAME_SIZE];	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;	pr_debug("raid5: error called\n");	if (!test_bit(Faulty, &rdev->flags)) {		set_bit(MD_CHANGE_DEVS, &mddev->flags);		if (test_and_clear_bit(In_sync, &rdev->flags)) {			unsigned long flags;			spin_lock_irqsave(&conf->device_lock, flags);			mddev->degraded++;			spin_unlock_irqrestore(&conf->device_lock, flags);			/*			 * if recovery was running, make sure it aborts.			 */			set_bit(MD_RECOVERY_ERR, &mddev->recovery);		}		set_bit(Faulty, &rdev->flags);		printk (KERN_ALERT			"raid5: Disk failure on %s, disabling device."			" Operation continuing on %d devices\n",			bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);	}}/* * Input: a 'big' sector number, * Output: index of the data and parity disk, and the sector # in them. */static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,			unsigned int data_disks, unsigned int * dd_idx,			unsigned int * pd_idx, raid5_conf_t *conf){	long stripe;	unsigned long chunk_number;	unsigned int chunk_offset;	sector_t new_sector;	int sectors_per_chunk = conf->chunk_size >> 9;	/* First compute the information on this sector */	/*	 * Compute the chunk number and the sector offset inside the chunk	 */	chunk_offset = sector_div(r_sector, sectors_per_chunk);	chunk_number = r_sector;	BUG_ON(r_sector != chunk_number);	/*	 * Compute the stripe number	 */	stripe = chunk_number / data_disks;	/*	 * Compute the data disk and parity disk indexes inside the stripe	 */	*dd_idx = chunk_number % data_disks;	/*	 * Select the parity disk based on the user selected algorithm.	 */	switch(conf->level) {	case 4:		*pd_idx = data_disks;		break;	case 5:		switch (conf->algorithm) {		case ALGORITHM_LEFT_ASYMMETRIC:			*pd_idx = data_disks - stripe % raid_disks;			if (*dd_idx >= *pd_idx)				(*dd_idx)++;			break;		case ALGORITHM_RIGHT_ASYMMETRIC:			*pd_idx = stripe % raid_disks;			if (*dd_idx >= *pd_idx)				(*dd_idx)++;			break;		case ALGORITHM_LEFT_SYMMETRIC:			*pd_idx = data_disks - stripe % raid_disks;			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;			break;		case ALGORITHM_RIGHT_SYMMETRIC:			*pd_idx = stripe % raid_disks;			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;			break;		default:			printk(KERN_ERR "raid5: unsupported algorithm %d\n",				conf->algorithm);		}		break;	case 6:		/**** FIX THIS ****/		switch (conf->algorithm) {		case ALGORITHM_LEFT_ASYMMETRIC:			*pd_idx = raid_disks - 1 - (stripe % raid_disks);			if (*pd_idx == raid_disks-1)				(*dd_idx)++; 	/* Q D D D P */			else if (*dd_idx >= *pd_idx)				(*dd_idx) += 2; /* D D P Q D */			break;		case ALGORITHM_RIGHT_ASYMMETRIC:			*pd_idx = stripe % raid_disks;			if (*pd_idx == raid_disks-1)				(*dd_idx)++; 	/* Q D D D P */			else if (*dd_idx >= *pd_idx)				(*dd_idx) += 2; /* D D P Q D */			break;		case ALGORITHM_LEFT_SYMMETRIC:			*pd_idx = raid_disks - 1 - (stripe % raid_disks);			*dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;			break;		case ALGORITHM_RIGHT_SYMMETRIC:			*pd_idx = stripe % raid_disks;			*dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;			break;		default:			printk (KERN_CRIT "raid6: unsupported algorithm %d\n",				conf->algorithm);		}		break;	}	/*	 * Finally, compute the new sector number	 */	new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset;	return new_sector;}static sector_t compute_blocknr(struct stripe_head *sh, int i){	raid5_conf_t *conf = sh->raid_conf;	int raid_disks = sh->disks;	int data_disks = raid_disks - conf->max_degraded;	sector_t new_sector = sh->sector, check;	int sectors_per_chunk = conf->chunk_size >> 9;	sector_t stripe;	int chunk_offset;	int chunk_number, dummy1, dummy2, dd_idx = i;	sector_t r_sector;	chunk_offset = sector_div(new_sector, sectors_per_chunk);	stripe = new_sector;	BUG_ON(new_sector != stripe);	if (i == sh->pd_idx)		return 0;	switch(conf->level) {	case 4: break;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?