📄 raid5-zerocopy-rhel5.patch
字号:
+ clear_bit(R5_OVERWRITE, &sh->dev[i].flags);+ set_bit(R5_UPTODATE, &sh->dev[i].flags);+ while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {+ copy_data(1, wbi, sh->dev[i].page, sector);+ wbi = r5_next_bio(wbi, sector); }+ } // switch(method) { // case RECONSTRUCT_WRITE:@@ -1149,8 +1261,12 @@ static void compute_parity6(struct strip count = 0; i = d0_idx; do {- ptrs[count++] = page_address(sh->dev[i].page);- if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags))+ if (test_bit(R5_Direct, &sh->dev[i].flags))+ ptrs[count++] = page_address(sh->dev[i].req.bi_io_vec[0].bv_page);+ else+ ptrs[count++] = page_address(sh->dev[i].page);+ if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags) &&+ !test_bit(R5_Direct, &sh->dev[i].flags)) printk("block %d/%d not uptodate on parity calc\n", i,count); i = raid6_next_disk(i, disks); } while ( i != d0_idx );@@ -1597,7 +1713,8 @@ static void handle_stripe5(struct stripe if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) &&- test_bit(R5_UPTODATE, &dev->flags) ) {+ (test_bit(R5_UPTODATE, &dev->flags) ||+ test_bit(R5_Direct, &dev->flags)) ) { /* We can return any write requests */ struct bio *wbi, *wbi2; int bitmap_end = 0;@@ -1605,6 +1722,7 @@ static void handle_stripe5(struct stripe spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL;+ clear_bit(R5_Direct, &dev->flags); while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) {@@ -2173,7 +2291,8 @@ static void handle_stripe6(struct stripe if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) &&- test_bit(R5_UPTODATE, &dev->flags) ) {+ (test_bit(R5_UPTODATE, &dev->flags) ||+ test_bit(R5_Direct, &dev->flags)) ) { /* We can return any write requests */ int bitmap_end = 0; struct bio *wbi, *wbi2;@@ -2182,6 +2301,7 @@ static void handle_stripe6(struct stripe spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL;+ clear_bit(R5_Direct, &dev->flags); while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) {@@ -3450,6 +3570,9 @@ static int run(mddev_t *mddev) mddev->queue->max_phys_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT; mddev->queue->max_hw_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;; + /* raid5 device is able to do zcopy right now. */+ mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONSTANT_WRITE;+ return 0; abort: if (conf) {@@ -3536,9 +3659,11 @@ static void status (struct seq_file *seq atomic_read(&conf->handled_in_raid5d), atomic_read(&conf->out_of_stripes), atomic_read(&conf->handle_called));- seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",+ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u", atomic_read(&conf->reads_for_rmw),- atomic_read(&conf->reads_for_rcw));+ atomic_read(&conf->reads_for_rcw),+ atomic_read(&conf->writes_zcopy),+ atomic_read(&conf->writes_copied)); seq_printf (seq, "\n\t\t%u delayed, %u bit delayed, %u active, queues: %u in, %u out\n", atomic_read(&conf->delayed), atomic_read(&conf->bit_delayed), atomic_read(&conf->active_stripes),diff -pur linux-2.6.18-53.orig/include/linux/backing-dev.h linux-2.6.18-53/include/linux/backing-dev.h--- linux-2.6.18-53.orig/include/linux/backing-dev.h 2007-12-28 14:49:26.000000000 +0800+++ linux-2.6.18-53/include/linux/backing-dev.h 2007-12-28 19:09:32.000000000 +0800@@ -48,6 +48,7 @@ struct backing_dev_info { #define BDI_CAP_READ_MAP 0x00000010 /* Can be mapped for reading */ #define BDI_CAP_WRITE_MAP 0x00000020 /* Can be mapped for writing */ #define BDI_CAP_EXEC_MAP 0x00000040 /* Can be mapped for execution */+#define BDI_CAP_PAGE_CONSTANT_WRITE 0x00000080 /* Zcopy write - for raid5 */ #define BDI_CAP_VMFLAGS \ (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) @@ -94,11 +95,18 @@ static inline int bdi_rw_congested(struc #define bdi_cap_account_dirty(bdi) \ (!((bdi)->capabilities & BDI_CAP_NO_ACCT_DIRTY)) +#define bdi_cap_page_constant_write(bdi) \+ ((bdi)->capabilities & BDI_CAP_PAGE_CONSTANT_WRITE)+ #define mapping_cap_writeback_dirty(mapping) \ bdi_cap_writeback_dirty((mapping)->backing_dev_info) #define mapping_cap_account_dirty(mapping) \ bdi_cap_account_dirty((mapping)->backing_dev_info) +#define mapping_cap_page_constant_write(mapping) \+ bdi_cap_page_constant_write((mapping)->backing_dev_info)+ + #endif /* _LINUX_BACKING_DEV_H */diff -pur linux-2.6.18-53.orig/include/linux/page-flags.h linux-2.6.18-53/include/linux/page-flags.h--- linux-2.6.18-53.orig/include/linux/page-flags.h 2007-12-28 14:49:26.000000000 +0800+++ linux-2.6.18-53/include/linux/page-flags.h 2007-12-28 19:09:32.000000000 +0800@@ -86,6 +86,7 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_nosave_free 18 /* Free, should not be written */ #define PG_buddy 19 /* Page is free, on buddy lists */+#define PG_constant 20 /* To mark if the page is constant */ /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */@@ -252,6 +253,14 @@ struct page; /* forward declaration */ +#define PageConstant(page) test_bit(PG_constant, &(page)->flags)+#define SetPageConstant(page) set_bit(PG_constant, &(page)->flags)+#define ClearPageConstant(page) clear_bit(PG_constant, &(page->flags))+#define TestSetPageConstant(page) test_and_set_bit(PG_constant, &(page)->flags)++extern int set_page_constant(struct page *page);+extern void clear_page_constant(struct page *);+ int test_clear_page_dirty(struct page *page); int test_clear_page_writeback(struct page *page); int test_set_page_writeback(struct page *page);diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h--- linux-2.6.18-53.orig/include/linux/raid/raid5.h 2007-12-28 18:55:24.000000000 +0800+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 19:09:32.000000000 +0800@@ -156,8 +156,9 @@ struct stripe_head { #define R5_Overlap 7 /* There is a pending overlapping request on this block */ #define R5_ReadError 8 /* seen a read error here recently */ #define R5_ReWrite 9 /* have tried to over-write the readerror */- #define R5_Expanded 10 /* This block now has post-expand data */+#define R5_Direct 11 /* Use the pages in bio to do the write directly. */+ /* * Write method */diff -pur linux-2.6.18-53.orig/mm/filemap.c linux-2.6.18-53/mm/filemap.c--- linux-2.6.18-53.orig/mm/filemap.c 2007-12-28 14:49:26.000000000 +0800+++ linux-2.6.18-53/mm/filemap.c 2007-12-28 19:09:32.000000000 +0800@@ -30,6 +30,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/cpuset.h>+#include <linux/rmap.h> #include "filemap.h" #include "internal.h" @@ -566,11 +567,55 @@ void end_page_writeback(struct page *pag if (!test_clear_page_writeback(page)) BUG(); }+ clear_page_constant(page); smp_mb__after_clear_bit(); wake_up_page(page, PG_writeback); } EXPORT_SYMBOL(end_page_writeback); +/* Make a page to be constant, `constant' means any write to this page will+ * be blocked until clear_page_constant is called.+ * The page lock must be held.+ */+int set_page_constant(struct page *page)+{+ BUG_ON(!PageLocked(page));++ /* If it's an anonymous page and haven't been added to swap cache,+ * return directly because we have no way to swap this page.+ */+ if (page_mapping(page) == NULL)+ return SWAP_FAIL;++ BUG_ON(!PageUptodate(page));++ /* I have to clear page uptodate before trying to remove+ * it from user's page table because otherwise, the page may be+ * reinstalled by a page access which happens between try_to_unmap()+ * and ClearPageUptodate(). -jay+ */+ ClearPageUptodate(page);+ if (page_mapped(page) && try_to_unmap(page, 0) != SWAP_SUCCESS) {+ SetPageUptodate(page);+ return SWAP_FAIL;+ }+ SetPageConstant(page);+ return SWAP_SUCCESS;+}++void clear_page_constant(struct page *page)+{+ if (PageConstant(page)) {+ BUG_ON(!PageLocked(page));+ BUG_ON(PageUptodate(page));+ ClearPageConstant(page);+ SetPageUptodate(page);+ unlock_page(page);+ }+}+EXPORT_SYMBOL(set_page_constant);+EXPORT_SYMBOL(clear_page_constant);+ /** * __lock_page - get a lock on the page, assuming we need to sleep to get it * @page: the page to lock
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -