raid5-stripe-by-stripe-handling-rhel5.patch
来自「lustre 1.6.5 source code」· PATCH 代码 · 共 285 行
PATCH
285 行
diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c--- linux-2.6.18-53.orig/drivers/md/raid5.c 2007-12-28 14:55:08.000000000 +0800+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 18:52:08.000000000 +0800@@ -2626,6 +2626,35 @@ static int raid5_issue_flush(request_que return ret; } +static inline int raid5_expanding_overlap(raid5_conf_t *conf, struct bio *bi)+{+ sector_t first_sector, last_sector;++ if (likely(conf->expand_progress == MaxSector))+ return 0;++ first_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);+ last_sector = bi->bi_sector + (bi->bi_size>>9);++ return (first_sector < conf->expand_progress &&+ last_sector >= conf->expand_lo);+}++static inline int raid5_redo_bio(raid5_conf_t *conf, struct bio *bi, int disks, sector_t sector)+{+ int redo = 0;++ if (likely(conf->expand_progress == MaxSector))+ return 0;++ spin_lock_irq(&conf->device_lock);+ redo = (raid5_expanding_overlap(conf, bi) ||+ (unlikely(sector < conf->expand_progress) &&+ disks == conf->previous_raid_disks));+ spin_unlock_irq(&conf->device_lock);+ return redo;+}+ static int make_request(request_queue_t *q, struct bio * bi) { mddev_t *mddev = q->queuedata;@@ -2636,6 +2665,14 @@ static int make_request(request_queue_t struct stripe_head *sh; const int rw = bio_data_dir(bi); int remaining;+ sector_t stripe, sectors, block, r_sector, b_sector;+ int sectors_per_chunk = conf->chunk_size >> 9;+ int stripes_per_chunk, sectors_per_block;+ int sectors_per_stripe;+ int i, j;++ DEFINE_WAIT(w);+ int disks, data_disks; atomic_inc(&conf->in_reqs_in_queue); @@ -2653,105 +2690,136 @@ static int make_request(request_queue_t else atomic_inc(&conf->reads_in); - logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); last_sector = bi->bi_sector + (bi->bi_size>>9); bi->bi_next = NULL; bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ - for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {- DEFINE_WAIT(w);- int disks, data_disks;-- retry:- prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);- if (likely(conf->expand_progress == MaxSector))- disks = conf->raid_disks;- else {- /* spinlock is needed as expand_progress may be- * 64bit on a 32bit platform, and so it might be- * possible to see a half-updated value- * Ofcourse expand_progress could change after- * the lock is dropped, so once we get a reference- * to the stripe that we think it is, we will have- * to check again.- */- spin_lock_irq(&conf->device_lock);- disks = conf->raid_disks;- if (logical_sector >= conf->expand_progress)- disks = conf->previous_raid_disks;- else {- if (logical_sector >= conf->expand_lo) {- spin_unlock_irq(&conf->device_lock);- schedule();- goto retry;- }- }- spin_unlock_irq(&conf->device_lock);- }- data_disks = disks - conf->max_degraded;+ sectors = bi->bi_size >> 9;+ stripes_per_chunk = conf->chunk_size / STRIPE_SIZE; - new_sector = raid5_compute_sector(logical_sector, disks, data_disks,- &dd_idx, &pd_idx, conf);- PRINTK("raid5: make_request, sector %llu logical %llu\n",- (unsigned long long)new_sector, - (unsigned long long)logical_sector);+redo_bio:+ /* stripe by stripe handle needs a stable raid layout, so if this+ * reuqest covers the expanding region, wait it over. + * Furthermore, we may get here with partial request handled, so+ * wait for the bi_phys_segment to be 1 also. -jay */+ spin_lock_irq(&conf->device_lock);+ wait_event_lock_irq(conf->wait_for_overlap,+ (bi->bi_phys_segments == 1) &&+ !raid5_expanding_overlap(conf, bi),+ conf->device_lock,+ (unplug_slaves(conf->mddev), atomic_inc(&conf->expanding_overlap)));++ disks = conf->raid_disks;+ if (unlikely(logical_sector >= conf->expand_progress))+ disks = conf->previous_raid_disks;+ data_disks = disks - conf->max_degraded;+ spin_unlock_irq(&conf->device_lock); - sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));- if (sh) {- if (unlikely(conf->expand_progress != MaxSector)) {- /* expansion might have moved on while waiting for a- * stripe, so we must do the range check again.- * Expansion could still move past after this- * test, but as we are holding a reference to- * 'sh', we know that if that happens,- * STRIPE_EXPANDING will get set and the expansion- * won't proceed until we finish with the stripe.- */- int must_retry = 0;- spin_lock_irq(&conf->device_lock);- if (logical_sector < conf->expand_progress &&- disks == conf->previous_raid_disks)- /* mismatch, need to try again */- must_retry = 1;- spin_unlock_irq(&conf->device_lock);- if (must_retry) {- release_stripe(sh);- goto retry;+ /* compute the block # */+ sectors_per_stripe = STRIPE_SECTORS * data_disks;+ sectors_per_block = stripes_per_chunk * sectors_per_stripe;++ block = logical_sector & ~((sector_t)sectors_per_block - 1);+ sector_div(block, sectors_per_block);++repeat:+ stripe = block * (sectors_per_block / data_disks);+ b_sector = stripe * data_disks;+ /* iterate through all stripes in this block,+ * where block is a set of internal stripes+ * which covers chunk */++ for (i = 0; i < stripes_per_chunk && sectors > 0; i++) {+ r_sector = b_sector + (i * STRIPE_SECTORS);+ sh = NULL;+ /* iterrate through all pages in the stripe */+ for (j = 0; j < data_disks && sectors > 0; j++) {+ DEFINE_WAIT(w);++ if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||+ r_sector >= last_sector) {+ r_sector += sectors_per_chunk;+ continue;+ }++retry:+ prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);+ new_sector = raid5_compute_sector(r_sector, disks,+ data_disks, &dd_idx,+ &pd_idx, conf);+ if (sh == NULL) {+ sh = get_active_stripe(conf, new_sector, disks, pd_idx,+ (bi->bi_rw&RWA_MASK));+ if (sh) {+ /* we're handling the bio stripe by stripe, so when we found+ * the raid layout has been changed, we have to redo the + * whole bio because we don't which sectors in it has been+ * done, and which is not done. -jay */+ if (raid5_redo_bio(conf, bi, disks, logical_sector))+ goto redo_bio;++ if (test_bit(STRIPE_EXPANDING, &sh->state)) {+ /* Stripe is busy expanding or+ * add failed due to overlap. Flush everything+ * and wait a while+ */+ release_stripe(sh);+ sh = NULL;+ raid5_unplug_device(mddev->queue);+ schedule();+ goto retry;+ }+ } else {+ /* cannot get stripe for read-ahead, just give-up */+ finish_wait(&conf->wait_for_overlap, &w);+ clear_bit(BIO_UPTODATE, &bi->bi_flags);+ sectors = 0;+ break; } }+ /* FIXME what if we get a false positive because these * are being updated. */- if (logical_sector >= mddev->suspend_lo &&- logical_sector < mddev->suspend_hi) {+ if (r_sector >= mddev->suspend_lo &&+ r_sector < mddev->suspend_hi) {+ handle_stripe(sh, NULL); release_stripe(sh);+ sh = NULL; schedule(); goto retry; } - if (test_bit(STRIPE_EXPANDING, &sh->state) ||- !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {- /* Stripe is busy expanding or- * add failed due to overlap. Flush everything- * and wait a while- */- raid5_unplug_device(mddev->queue);+ if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {+ handle_stripe(sh, NULL); release_stripe(sh);+ sh = NULL;+ raid5_unplug_device(mddev->queue); schedule(); goto retry; } finish_wait(&conf->wait_for_overlap, &w);++ BUG_ON (new_sector != stripe);+ sectors -= STRIPE_SECTORS;+ if (bi->bi_sector > r_sector)+ sectors += bi->bi_sector - r_sector;+ if (r_sector + STRIPE_SECTORS > last_sector)+ sectors += r_sector + STRIPE_SECTORS - last_sector;+ r_sector += sectors_per_chunk;+ }+ if (sh) { handle_stripe(sh, NULL); release_stripe(sh);- } else {- /* cannot get stripe for read-ahead, just give-up */- clear_bit(BIO_UPTODATE, &bi->bi_flags);- finish_wait(&conf->wait_for_overlap, &w);- break;+ sh = NULL; }- + stripe += STRIPE_SECTORS; }+ block++;+ if (sectors > 0)+ goto repeat;+ spin_lock_irq(&conf->device_lock); remaining = --bi->bi_phys_segments; spin_unlock_irq(&conf->device_lock);@@ -3439,6 +3507,8 @@ static void status (struct seq_file *seq atomic_read(&conf->active_stripes), atomic_read(&conf->in_reqs_in_queue), atomic_read(&conf->out_reqs_in_queue));+ seq_printf (seq, "\t\t%u expanding overlap\n",+ atomic_read(&conf->expanding_overlap)); #if RAID5_DEBUG seq_printf (seq, "\n"); printall(seq, conf);diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h--- linux-2.6.18-53.orig/include/linux/raid/raid5.h 2007-12-28 14:55:08.000000000 +0800+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 18:09:37.000000000 +0800@@ -278,6 +278,7 @@ struct raid5_private_data { atomic_t bit_delayed; atomic_t in_reqs_in_queue; atomic_t out_reqs_in_queue;+ atomic_t expanding_overlap; }; typedef struct raid5_private_data raid5_conf_t;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?