📄 raid5-serialize-ovelapping-reqs.patch
字号:
RAID5 wasn't designed to support overlapping requests becausein Linux all I/Os are serialized by page/buffer lock. As Lustredoesn't use pagecache on server, we need to serialize I/Os in RAID5.Index: linux-2.6.9/include/linux/raid/raid5.h===================================================================--- linux-2.6.9.orig/include/linux/raid/raid5.h 2006-05-22 00:11:21.000000000 +0400+++ linux-2.6.9/include/linux/raid/raid5.h 2006-05-22 00:11:21.000000000 +0400@@ -134,6 +134,7 @@ struct stripe_head { unsigned long state; /* state flags */ atomic_t count; /* nr of active thread/requests */ spinlock_t lock;+ wait_queue_head_t wait; /* waitchan for overlapped bio's */ struct r5dev { struct bio req; struct bio_vec vec;Index: linux-2.6.9/drivers/md/raid5.c===================================================================--- linux-2.6.9.orig/drivers/md/raid5.c 2006-05-22 00:11:21.000000000 +0400+++ linux-2.6.9/drivers/md/raid5.c 2006-05-22 00:19:27.000000000 +0400@@ -308,6 +308,7 @@ static int grow_stripes(raid5_conf_t *co memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev)); sh->raid_conf = conf; sh->lock = SPIN_LOCK_UNLOCKED;+ init_waitqueue_head(&sh->wait); if (grow_buffers(sh, conf->raid_disks)) { shrink_buffers(sh, conf->raid_disks);@@ -878,6 +879,9 @@ static void compute_parity(struct stripe set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); } else clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);++ /* probably someone waits for our completion? */+ wake_up(&sh->wait); } /*@@ -885,7 +889,7 @@ static void compute_parity(struct stripe * toread/towrite point to the first in a chain. * The bi_next chain must be in order. */-static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)+static int add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite) { struct bio **bip; raid5_conf_t *conf = sh->raid_conf;@@ -894,13 +898,21 @@ static void add_stripe_bio (struct strip (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector); - spin_lock(&sh->lock); spin_lock_irq(&conf->device_lock); if (forwrite) bip = &sh->dev[dd_idx].towrite; else bip = &sh->dev[dd_idx].toread;++#if 1+ if (*bip) {+ /* overlapping bio, let's wait till first one is completed */+ spin_unlock_irq(&conf->device_lock);+ spin_unlock(&sh->lock);+ return 1;+ }+#else while (*bip && (*bip)->bi_sector < bi->bi_sector) { BUG_ON((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector); bip = & (*bip)->bi_next;@@ -910,6 +922,7 @@ static void add_stripe_bio (struct strip BUG(); if (*bip) bi->bi_next = *bip;+#endif *bip = bi; bi->bi_phys_segments ++; spin_unlock_irq(&conf->device_lock);@@ -932,6 +945,7 @@ static void add_stripe_bio (struct strip if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); }+ return 0; } /*@@ -1014,6 +1028,7 @@ static void handle_stripe(struct stripe_ rbi = dev->toread; dev->toread = NULL; spin_unlock_irq(&conf->device_lock);+ wake_up(&sh->wait); while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { copy_data(0, rbi, dev->page, dev->sector); rbi2 = r5_next_bio(rbi, dev->sector);@@ -1059,6 +1074,7 @@ static void handle_stripe(struct stripe_ bi = sh->dev[i].towrite; sh->dev[i].towrite = NULL; if (bi) to_write--;+ wake_up(&sh->wait); while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);@@ -1511,6 +1527,16 @@ static inline void raid5_plug_device(rai spin_unlock_irq(&conf->device_lock); } +static inline void raid5_wait_stripe(struct stripe_head *sh, int dd_idx, int forwrite)+{+ struct bio **bip;+ if (forwrite)+ bip = &sh->dev[dd_idx].towrite;+ else+ bip = &sh->dev[dd_idx].toread;+ wait_event(sh->wait, *bip == NULL);+}+ static int make_request (request_queue_t *q, struct bio * bi) { mddev_t *mddev = q->queuedata;@@ -1580,6 +1606,7 @@ repeat: * if we can't, then it's time to submit * all collected bio's in order to free * some space in the cache -bzzz */+try_stripe: sh = get_active_stripe(conf, new_sector, pd_idx, 1); if (!sh && !(bi->bi_rw&RWA_MASK)) { raid5_flush_bios(conf, bios, raid_disks);@@ -1587,7 +1614,11 @@ repeat: } } if (sh) {- add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));+ if (add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {+ release_stripe(sh);+ raid5_wait_stripe(sh, dd_idx, bi->bi_rw&RW_MASK);+ goto try_stripe;+ } } else { /* cannot get stripe for read-ahead, just give-up */ clear_bit(BIO_UPTODATE, &bi->bi_flags);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -