📄 raid5-optimize-memcpy.patch
字号:
In case of full-stripe writes don't copy data into internal cache.This optimization reduces CPU load by 30% rougly.Index: linux-2.6.9/include/linux/raid/raid5.h===================================================================--- linux-2.6.9.orig/include/linux/raid/raid5.h 2006-05-21 17:57:25.000000000 +0400+++ linux-2.6.9/include/linux/raid/raid5.h 2006-05-22 00:10:04.000000000 +0400@@ -152,6 +152,7 @@ struct stripe_head { #define R5_Wantread 4 /* want to schedule a read */ #define R5_Wantwrite 5 #define R5_Syncio 6 /* this io need to be accounted as resync io */+#define R5_Direct 7 /* use page fom passed bio to avoid memcpy */ /* * Write methodIndex: linux-2.6.9/drivers/md/raid5.c===================================================================--- linux-2.6.9.orig/drivers/md/raid5.c 2006-05-22 00:10:01.000000000 +0400+++ linux-2.6.9/drivers/md/raid5.c 2006-05-22 00:10:04.000000000 +0400@@ -411,6 +411,8 @@ static int raid5_end_read_request (struc clear_buffer_uptodate(bh); } #endif+ if (test_bit(R5_Direct, &sh->dev[i].flags))+ printk("R5_Direct for READ ?!\n"); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh);@@ -449,6 +451,10 @@ static int raid5_end_write_request (stru rdev_dec_pending(conf->disks[i].rdev, conf->mddev); + if (test_bit(R5_Direct, &sh->dev[i].flags)) {+ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);+ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;+ } clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); __release_stripe(conf, sh);@@ -673,6 +679,49 @@ static void copy_data(int frombio, struc } } +static struct page *zero_copy_data(struct bio *bio, sector_t sector)+{+ struct bio_vec *bvl;+ int i;++ for (;bio && bio->bi_sector < sector+STRIPE_SECTORS;+ bio = r5_next_bio(bio, sector) ) {+ int page_offset;+ if (bio->bi_sector >= sector)+ page_offset = (signed)(bio->bi_sector - sector) * 512;+ else + page_offset = (signed)(sector - bio->bi_sector) * -512;+ bio_for_each_segment(bvl, bio, i) {+ int len = bio_iovec_idx(bio,i)->bv_len;+ int clen;+ int b_offset = 0; ++ if (page_offset < 0) {+ b_offset = -page_offset;+ page_offset += b_offset;+ len -= b_offset;+ }++ if (len > 0 && page_offset + len > STRIPE_SIZE)+ clen = STRIPE_SIZE - page_offset; + else clen = len;+ + if (clen > 0) {+ BUG_ON(clen < STRIPE_SIZE);+ /*printk(" sector %lu: page %p from index %u\n",+ (unsigned long) sector,+ bio_iovec_idx(bio, i)->bv_page, i);*/+ return bio_iovec_idx(bio, i)->bv_page;+ } + if (clen < len) /* hit end of page */+ break;+ page_offset += len;+ }+ }+ BUG();+ return NULL;+}+ #define check_xor() do { \ if (count == MAX_XOR_BLOCKS) { \ xor_block(count, STRIPE_SIZE, ptr); \@@ -717,6 +766,8 @@ static void compute_parity(struct stripe int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; void *ptr[MAX_XOR_BLOCKS]; struct bio *chosen;+ struct page *page;+ int zerocopy = 0; PRINTK("compute_parity, stripe %llu, method %d\n", (unsigned long long)sh->sector, method);@@ -743,13 +794,17 @@ static void compute_parity(struct stripe break; case RECONSTRUCT_WRITE: memset(ptr[0], 0, STRIPE_SIZE);- for (i= disks; i-- ;)+ zerocopy = 1;+ for (i= disks; i-- ;) {+ if (i != pd_idx && !sh->dev[i].towrite)+ zerocopy = 0; if (i!=pd_idx && sh->dev[i].towrite) { chosen = sh->dev[i].towrite; sh->dev[i].towrite = NULL; if (sh->dev[i].written) BUG(); sh->dev[i].written = chosen; }+ } break; case CHECK_PARITY: break;@@ -759,34 +814,62 @@ static void compute_parity(struct stripe count = 1; } - for (i = disks; i--;)- if (sh->dev[i].written) {- sector_t sector = sh->dev[i].sector;- struct bio *wbi = sh->dev[i].written;- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {- copy_data(1, wbi, sh->dev[i].page, sector);- wbi = r5_next_bio(wbi, sector);- }+ for (i = disks; i--;) {+ struct bio *wbi = sh->dev[i].written;+ sector_t sector;++ if (!wbi)+ continue;++ sector = sh->dev[i].sector;+ set_bit(R5_LOCKED, &sh->dev[i].flags);+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));++ /* check if it's covered by a single page+ and whole stripe is written at once.+ * in this case we can avoid memcpy() */+ if (zerocopy && wbi && wbi->bi_next == NULL && + test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {+ page = zero_copy_data(wbi, sector);+ BUG_ON(PageHighMem(page));+ sh->dev[i].req.bi_io_vec[0].bv_page = page;+ set_bit(R5_Direct, &sh->dev[i].flags);+ clear_bit(R5_UPTODATE, &sh->dev[i].flags);+ continue;+ } - set_bit(R5_LOCKED, &sh->dev[i].flags);- set_bit(R5_UPTODATE, &sh->dev[i].flags);+ set_bit(R5_UPTODATE, &sh->dev[i].flags);+ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {+ copy_data(1, wbi, sh->dev[i].page, sector);+ wbi = r5_next_bio(wbi, sector); }+ } switch(method) { case RECONSTRUCT_WRITE: case CHECK_PARITY:- for (i=disks; i--;)- if (i != pd_idx) {- ptr[count++] = page_address(sh->dev[i].page);- check_xor();- }+ for (i=disks; i--;) {+ if (i == pd_idx)+ continue;+ if (test_bit(R5_Direct, &sh->dev[i].flags))+ page = sh->dev[i].req.bi_io_vec[0].bv_page;+ else+ page = sh->dev[i].page;+ ptr[count++] = page_address(page);+ check_xor();+ } break; case READ_MODIFY_WRITE:- for (i = disks; i--;)- if (sh->dev[i].written) {- ptr[count++] = page_address(sh->dev[i].page);- check_xor();- }+ for (i = disks; i--;) {+ if (!sh->dev[i].written)+ continue;+ if (test_bit(R5_Direct, &sh->dev[i].flags))+ page = sh->dev[i].req.bi_io_vec[0].bv_page;+ else+ page = sh->dev[i].page;+ ptr[count++] = page_address(page);+ check_xor();+ } } if (count != 1) xor_block(count, STRIPE_SIZE, ptr);@@ -1012,7 +1094,7 @@ static void handle_stripe(struct stripe_ dev = &sh->dev[sh->pd_idx]; if ( written && ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&- test_bit(R5_UPTODATE, &dev->flags))+ (test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Direct, &dev->flags))) || (failed == 1 && failed_num == sh->pd_idx)) ) { /* any written block on an uptodate or failed drive can be returned.@@ -1023,13 +1105,16 @@ static void handle_stripe(struct stripe_ if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) &&- test_bit(R5_UPTODATE, &dev->flags) ) {+ (test_bit(R5_UPTODATE, &dev->flags) ||+ test_bit(R5_Direct, &dev->flags)) ) { /* We can return any write requests */ struct bio *wbi, *wbi2; PRINTK("Return write for disc %d\n", i); spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL;+ if (test_bit(R5_Direct, &dev->flags))+ clear_bit(R5_Direct, &dev->flags); while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -