raid5-stripe-by-stripe-handling-rhel5.patch

来自「lustre 1.6.5 source code」· PATCH 代码 · 共 285 行

PATCH
285
字号
diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c--- linux-2.6.18-53.orig/drivers/md/raid5.c	2007-12-28 14:55:08.000000000 +0800+++ linux-2.6.18-53/drivers/md/raid5.c	2007-12-28 18:52:08.000000000 +0800@@ -2626,6 +2626,35 @@ static int raid5_issue_flush(request_que 	return ret; } +static inline int raid5_expanding_overlap(raid5_conf_t *conf, struct bio *bi)+{+	sector_t first_sector, last_sector;++	if (likely(conf->expand_progress == MaxSector))+		return 0;++	first_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);+	last_sector = bi->bi_sector + (bi->bi_size>>9);++	return (first_sector < conf->expand_progress &&+		last_sector >= conf->expand_lo);+}++static inline int raid5_redo_bio(raid5_conf_t *conf, struct bio *bi, int disks, sector_t sector)+{+	int redo = 0;++	if (likely(conf->expand_progress == MaxSector))+		return 0;++	spin_lock_irq(&conf->device_lock);+	redo = (raid5_expanding_overlap(conf, bi) ||+		(unlikely(sector < conf->expand_progress) &&+		disks == conf->previous_raid_disks));+	spin_unlock_irq(&conf->device_lock);+	return redo;+}+ static int make_request(request_queue_t *q, struct bio * bi) { 	mddev_t *mddev = q->queuedata;@@ -2636,6 +2665,14 @@ static int make_request(request_queue_t  	struct stripe_head *sh; 	const int rw = bio_data_dir(bi); 	int remaining;+	sector_t stripe, sectors, block, r_sector, b_sector;+	int sectors_per_chunk = conf->chunk_size >> 9;+	int stripes_per_chunk, sectors_per_block;+	int sectors_per_stripe;+	int i, j;++	DEFINE_WAIT(w);+	int disks, data_disks;  	atomic_inc(&conf->in_reqs_in_queue); @@ -2653,105 +2690,136 @@ static int make_request(request_queue_t  	else 		atomic_inc(&conf->reads_in); - 	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 	last_sector = bi->bi_sector + (bi->bi_size>>9); 	bi->bi_next = NULL; 	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */ -	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {-		DEFINE_WAIT(w);-		int disks, data_disks;--	retry:-		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);-		if (likely(conf->expand_progress == MaxSector))-			disks = conf->raid_disks;-		else {-			/* spinlock is needed as expand_progress may be-			 * 64bit on a 32bit platform, and so it might be-			 * possible to see a half-updated value-			 * Ofcourse expand_progress could change after-			 * the lock is dropped, so once we get a reference-			 * to the stripe that we think it is, we will have-			 * to check again.-			 */-			spin_lock_irq(&conf->device_lock);-			disks = conf->raid_disks;-			if (logical_sector >= conf->expand_progress)-				disks = conf->previous_raid_disks;-			else {-				if (logical_sector >= conf->expand_lo) {-					spin_unlock_irq(&conf->device_lock);-					schedule();-					goto retry;-				}-			}-			spin_unlock_irq(&conf->device_lock);-		}-		data_disks = disks - conf->max_degraded;+	sectors = bi->bi_size >> 9;+	stripes_per_chunk = conf->chunk_size / STRIPE_SIZE; - 		new_sector = raid5_compute_sector(logical_sector, disks, data_disks,-						  &dd_idx, &pd_idx, conf);-		PRINTK("raid5: make_request, sector %llu logical %llu\n",-			(unsigned long long)new_sector, -			(unsigned long long)logical_sector);+redo_bio:+	/* stripe by stripe handle needs a stable raid layout, so if this+	 * reuqest covers the expanding region, wait it over. +	 * Furthermore, we may get here with partial request handled, so+	 * wait for the bi_phys_segment to be 1 also. -jay */+	spin_lock_irq(&conf->device_lock);+	wait_event_lock_irq(conf->wait_for_overlap,+			(bi->bi_phys_segments == 1) &&+			!raid5_expanding_overlap(conf, bi),+			conf->device_lock,+			(unplug_slaves(conf->mddev), atomic_inc(&conf->expanding_overlap)));++	disks = conf->raid_disks;+	if (unlikely(logical_sector >= conf->expand_progress))+		disks = conf->previous_raid_disks;+	data_disks = disks - conf->max_degraded;+	spin_unlock_irq(&conf->device_lock); -		sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));-		if (sh) {-			if (unlikely(conf->expand_progress != MaxSector)) {-				/* expansion might have moved on while waiting for a-				 * stripe, so we must do the range check again.-				 * Expansion could still move past after this-				 * test, but as we are holding a reference to-				 * 'sh', we know that if that happens,-				 *  STRIPE_EXPANDING will get set and the expansion-				 * won't proceed until we finish with the stripe.-				 */-				int must_retry = 0;-				spin_lock_irq(&conf->device_lock);-				if (logical_sector <  conf->expand_progress &&-				    disks == conf->previous_raid_disks)-					/* mismatch, need to try again */-					must_retry = 1;-				spin_unlock_irq(&conf->device_lock);-				if (must_retry) {-					release_stripe(sh);-					goto retry;+	/* compute the block # */+	sectors_per_stripe = STRIPE_SECTORS * data_disks;+	sectors_per_block = stripes_per_chunk * sectors_per_stripe;++	block = logical_sector & ~((sector_t)sectors_per_block - 1);+	sector_div(block, sectors_per_block);++repeat:+	stripe = block * (sectors_per_block / data_disks);+	b_sector = stripe * data_disks;+	/* iterate through all stripes in this block,+	 * where block is a set of internal stripes+	 * which covers chunk */++	for (i = 0; i < stripes_per_chunk && sectors > 0; i++) {+		r_sector = b_sector + (i * STRIPE_SECTORS);+		sh = NULL;+		/* iterrate through all pages in the stripe */+		for (j = 0; j < data_disks && sectors > 0; j++) {+			DEFINE_WAIT(w);++			if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||+			    r_sector >= last_sector) {+				r_sector += sectors_per_chunk;+				continue;+			}++retry:+			prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);+			new_sector = raid5_compute_sector(r_sector, disks,+							data_disks, &dd_idx,+							&pd_idx, conf);+			if (sh == NULL) {+				sh = get_active_stripe(conf, new_sector, disks, pd_idx,+							(bi->bi_rw&RWA_MASK));+				if (sh) {+					/* we're handling the bio stripe by stripe, so when we found+					 * the raid layout has been changed, we have to redo the +					 * whole bio because we don't which sectors in it has been+					 * done, and which is not done. -jay */+					if (raid5_redo_bio(conf, bi, disks, logical_sector))+						goto redo_bio;++					if (test_bit(STRIPE_EXPANDING, &sh->state)) {+						/* Stripe is busy expanding or+						 * add failed due to overlap.  Flush everything+						 * and wait a while+						 */+						release_stripe(sh);+						sh = NULL;+						raid5_unplug_device(mddev->queue);+						schedule();+						goto retry;+					}+				} else {+					/* cannot get stripe for read-ahead, just give-up */+					finish_wait(&conf->wait_for_overlap, &w);+					clear_bit(BIO_UPTODATE, &bi->bi_flags);+					sectors = 0;+					break; 				} 			}+ 			/* FIXME what if we get a false positive because these 			 * are being updated. 			 */-			if (logical_sector >= mddev->suspend_lo &&-			    logical_sector < mddev->suspend_hi) {+			if (r_sector >= mddev->suspend_lo &&+			    r_sector < mddev->suspend_hi) {+				handle_stripe(sh, NULL); 				release_stripe(sh);+				sh = NULL; 				schedule(); 				goto retry; 			} -			if (test_bit(STRIPE_EXPANDING, &sh->state) ||-			    !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {-				/* Stripe is busy expanding or-				 * add failed due to overlap.  Flush everything-				 * and wait a while-				 */-				raid5_unplug_device(mddev->queue);+			if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {+				handle_stripe(sh, NULL); 				release_stripe(sh);+				sh = NULL;+				raid5_unplug_device(mddev->queue); 				schedule(); 				goto retry; 			} 			finish_wait(&conf->wait_for_overlap, &w);++			BUG_ON (new_sector != stripe);+			sectors -= STRIPE_SECTORS;+			if (bi->bi_sector > r_sector)+				sectors += bi->bi_sector - r_sector;+			if (r_sector + STRIPE_SECTORS > last_sector)+				sectors += r_sector + STRIPE_SECTORS - last_sector;+			r_sector += sectors_per_chunk;+		}+		if (sh) { 			handle_stripe(sh, NULL); 			release_stripe(sh);-		} else {-			/* cannot get stripe for read-ahead, just give-up */-			clear_bit(BIO_UPTODATE, &bi->bi_flags);-			finish_wait(&conf->wait_for_overlap, &w);-			break;+			sh = NULL; 		}-			+		stripe += STRIPE_SECTORS; 	}+	block++;+	if (sectors > 0)+		goto repeat;+ 	spin_lock_irq(&conf->device_lock); 	remaining = --bi->bi_phys_segments; 	spin_unlock_irq(&conf->device_lock);@@ -3439,6 +3507,8 @@ static void status (struct seq_file *seq 			atomic_read(&conf->active_stripes), 			atomic_read(&conf->in_reqs_in_queue), 			atomic_read(&conf->out_reqs_in_queue));+	seq_printf (seq, "\t\t%u expanding overlap\n",+			atomic_read(&conf->expanding_overlap)); #if RAID5_DEBUG 	seq_printf (seq, "\n"); 	printall(seq, conf);diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h--- linux-2.6.18-53.orig/include/linux/raid/raid5.h	2007-12-28 14:55:08.000000000 +0800+++ linux-2.6.18-53/include/linux/raid/raid5.h	2007-12-28 18:09:37.000000000 +0800@@ -278,6 +278,7 @@ struct raid5_private_data { 	atomic_t		bit_delayed; 	atomic_t		in_reqs_in_queue; 	atomic_t		out_reqs_in_queue;+	atomic_t		expanding_overlap; };  typedef struct raid5_private_data raid5_conf_t;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?