📄 raid5-zerocopy-rhel5.patch
字号:
diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c--- linux-2.6.18-53.orig/drivers/md/raid5.c 2007-12-28 19:09:20.000000000 +0800+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 19:09:32.000000000 +0800@@ -633,6 +633,7 @@ static int raid5_end_read_request(struct clear_buffer_uptodate(bh); } #endif+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh);@@ -671,6 +672,10 @@ static int raid5_end_write_request (stru rdev_dec_pending(conf->disks[i].rdev, conf->mddev); + if (test_bit(R5_Direct, &sh->dev[i].flags)) {+ BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);+ sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;+ } clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); __release_stripe(conf, sh);@@ -911,7 +916,27 @@ static sector_t compute_blocknr(struct s return r_sector; } +static struct page *zero_copy_data(struct bio *bio, sector_t sector)+{+ sector_t bi_sector = bio->bi_sector;+ struct page *page = NULL;+ struct bio_vec *bvl;+ int i; + bio_for_each_segment(bvl, bio, i) {+ if (sector == bi_sector)+ page = bio_iovec_idx(bio, i)->bv_page;+ bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9;+ if (bi_sector >= sector + STRIPE_SECTORS) {+ /* check if the stripe is covered by one page */+ if (page == bio_iovec_idx(bio, i)->bv_page &&+ PageConstant(page))+ return page;+ return NULL;+ }+ }+ return NULL;+} /* * Copy data between a page in the stripe cache, and one or more bion@@ -1003,8 +1028,9 @@ static void compute_parity5(struct strip { raid5_conf_t *conf = sh->raid_conf; int i, pd_idx = sh->pd_idx, disks = sh->disks, count;- void *ptr[MAX_XOR_BLOCKS];+ void *ptr[MAX_XOR_BLOCKS], *h_ptr[2]; struct bio *chosen;+ struct page *page; PRINTK("compute_parity5, stripe %llu, method %d\n", (unsigned long long)sh->sector, method);@@ -1054,34 +1080,90 @@ static void compute_parity5(struct strip count = 1; } - for (i = disks; i--;)- if (sh->dev[i].written) {- sector_t sector = sh->dev[i].sector;- struct bio *wbi = sh->dev[i].written;- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {- copy_data(1, wbi, sh->dev[i].page, sector);- wbi = r5_next_bio(wbi, sector);+ for (i = disks; i--;) {+ struct r5dev *dev = &sh->dev[i];+ struct bio *wbi = dev->written;+ sector_t sector;++ if (!wbi)+ continue;++ sector = dev->sector;+ set_bit(R5_LOCKED, &sh->dev[i].flags);+ BUG_ON(test_bit(R5_Direct, &dev->flags));++ /* check if it's covered by a single page+ and whole stripe is written at once.+ * in this case we can avoid memcpy() */+ if (!wbi->bi_next && test_bit(R5_OVERWRITE, &dev->flags) &&+ test_bit(R5_Insync, &dev->flags)) {+ page = zero_copy_data(wbi, sector);+ if (page) {+ atomic_inc(&conf->writes_zcopy);+ dev->req.bi_io_vec[0].bv_page = page;+ set_bit(R5_Direct, &dev->flags);+ clear_bit(R5_UPTODATE, &sh->dev[i].flags);+ clear_bit(R5_OVERWRITE, &sh->dev[i].flags);+ continue; }+ } - set_bit(R5_LOCKED, &sh->dev[i].flags);- set_bit(R5_UPTODATE, &sh->dev[i].flags);+ /* do copy write */+ atomic_inc(&conf->writes_copied);+ clear_bit(R5_OVERWRITE, &sh->dev[i].flags);+ set_bit(R5_UPTODATE, &sh->dev[i].flags);+ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {+ copy_data(1, wbi, sh->dev[i].page, sector);+ wbi = r5_next_bio(wbi, sector); }+ } + h_ptr[0] = ptr[0]; switch(method) { case RECONSTRUCT_WRITE: case CHECK_PARITY:- for (i=disks; i--;)- if (i != pd_idx) {- ptr[count++] = page_address(sh->dev[i].page);- check_xor();+ for (i=disks; i--;) {+ if (i == pd_idx)+ continue;+ if (test_bit(R5_Direct, &sh->dev[i].flags))+ page = sh->dev[i].req.bi_io_vec[0].bv_page;+ else+ page = sh->dev[i].page;++ /* have to compute the parity immediately for+ * a highmem page. it would happen for zerocopy. -jay+ */+ if (PageHighMem(page)) {+ h_ptr[1] = kmap_atomic(page, KM_USER0);+ xor_block(2, STRIPE_SIZE, h_ptr);+ kunmap_atomic(page, KM_USER0);+ } else {+ ptr[count++] = page_address(page); }+ check_xor();+ } break; case READ_MODIFY_WRITE:- for (i = disks; i--;)- if (sh->dev[i].written) {- ptr[count++] = page_address(sh->dev[i].page);- check_xor();+ for (i = disks; i--;) {+ if (!sh->dev[i].written)+ continue;+ if (test_bit(R5_Direct, &sh->dev[i].flags))+ page = sh->dev[i].req.bi_io_vec[0].bv_page;+ else+ page = sh->dev[i].page;++ /* have to compute the parity immediately for+ * a highmem page. it would happen for zerocopy. -jay+ */+ if (PageHighMem(page)) {+ h_ptr[1] = kmap_atomic(page, KM_USER0);+ xor_block(2, STRIPE_SIZE, h_ptr);+ kunmap_atomic(page, KM_USER0);+ } else {+ ptr[count++] = page_address(page); }+ check_xor();+ } } if (count != 1) xor_block(count, STRIPE_SIZE, ptr);@@ -1098,6 +1180,7 @@ static void compute_parity6(struct strip raid6_conf_t *conf = sh->raid_conf; int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count; struct bio *chosen;+ struct page *page; /**** FIX THIS: This could be very bad if disks is close to 256 ****/ void *ptrs[disks]; @@ -1127,18 +1210,47 @@ static void compute_parity6(struct strip BUG(); /* Not implemented yet */ } - for (i = disks; i--;)- if (sh->dev[i].written) {- sector_t sector = sh->dev[i].sector;- struct bio *wbi = sh->dev[i].written;- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {- copy_data(1, wbi, sh->dev[i].page, sector);- wbi = r5_next_bio(wbi, sector);+ for (i = disks; i--;) {+ struct r5dev *dev = &sh->dev[i];+ struct bio *wbi = dev->written;+ sector_t sector;++ if (!wbi)+ continue;++ sector = sh->dev[i].sector;+ set_bit(R5_LOCKED, &sh->dev[i].flags);+ BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));++ /* check if it's covered by a single page+ * and whole stripe is written at once.+ * in this case we can avoid memcpy() */+ if (!wbi->bi_next && test_bit(R5_Insync, &sh->dev[i].flags) &&+ test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {+ page = zero_copy_data(wbi, sector);+ /* we don't do zerocopy on a HighMem page. Raid6 tend+ * to prepare all of the pages' content to be accessed+ * before computing PQ parity. If we need to support HighMem+ * page also, we have to modify the gen_syndrome()+ * algorithm. -jay */+ if (page && !PageHighMem(page)) {+ atomic_inc(&conf->writes_zcopy);+ sh->dev[i].req.bi_io_vec[0].bv_page = page;+ set_bit(R5_Direct, &sh->dev[i].flags);+ clear_bit(R5_UPTODATE, &sh->dev[i].flags);+ clear_bit(R5_OVERWRITE, &sh->dev[i].flags);+ continue; }+ } - set_bit(R5_LOCKED, &sh->dev[i].flags);- set_bit(R5_UPTODATE, &sh->dev[i].flags);+ atomic_inc(&conf->writes_copied);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -