⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 raid5-zerocopy.patch

📁 非常经典的一个分布式系统
💻 PATCH
字号:
diff -pru linux-2.6.9.orig/drivers/md/raid5.c linux-2.6.9/drivers/md/raid5.c--- linux-2.6.9.orig/drivers/md/raid5.c	2007-07-09 02:43:33.000000000 -0600+++ linux-2.6.9/drivers/md/raid5.c	2007-07-13 00:39:15.000000000 -0600@@ -412,6 +412,7 @@ static int raid5_end_read_request (struc 		clear_buffer_uptodate(bh); 	} #endif+	BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)); 	clear_bit(R5_LOCKED, &sh->dev[i].flags); 	set_bit(STRIPE_HANDLE, &sh->state); 	release_stripe(sh);@@ -450,6 +451,10 @@ static int raid5_end_write_request (stru  	rdev_dec_pending(conf->disks[i].rdev, conf->mddev); 	+	if (test_bit(R5_Direct, &sh->dev[i].flags)) {+		BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);+		sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;+	} 	clear_bit(R5_LOCKED, &sh->dev[i].flags); 	set_bit(STRIPE_HANDLE, &sh->state); 	__release_stripe(conf, sh);@@ -621,6 +626,25 @@ static sector_t compute_blocknr(struct s }  +static struct page *zero_copy_data(struct bio *bio, sector_t sector)+{+	sector_t bi_sector = bio->bi_sector;+	struct page *page;+	struct bio_vec *bvl;+	int i;++	bio_for_each_segment(bvl, bio, i) {+		if (sector > bi_sector) {+			bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9;+			continue;+		}+		BUG_ON(sector != bi_sector);+		page = bio_iovec_idx(bio, i)->bv_page;+		return PageConstant(page) ? page : NULL;+	}+	BUG();+	return NULL;+}  /*  * Copy data between a page in the stripe cache, and one or more bion@@ -716,8 +740,9 @@ static void compute_parity(struct stripe { 	raid5_conf_t *conf = sh->raid_conf; 	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;-	void *ptr[MAX_XOR_BLOCKS];+	void *ptr[MAX_XOR_BLOCKS], *h_ptr[2]; 	struct bio *chosen;+	struct page *page;  	PRINTK("compute_parity, stripe %llu, method %d\n", 		(unsigned long long)sh->sector, method);@@ -744,13 +769,14 @@ static void compute_parity(struct stripe 		break; 	case RECONSTRUCT_WRITE: 		memset(ptr[0], 0, STRIPE_SIZE);-		for (i= disks; i-- ;)+		for (i= disks; i-- ;) { 			if (i!=pd_idx && sh->dev[i].towrite) { 				chosen = sh->dev[i].towrite; 				sh->dev[i].towrite = NULL; 				if (sh->dev[i].written) BUG(); 				sh->dev[i].written = chosen; 			}+		} 		break; 	case CHECK_PARITY: 		break;@@ -760,34 +786,88 @@ static void compute_parity(struct stripe 		count = 1; 	} 	-	for (i = disks; i--;)-		if (sh->dev[i].written) {-			sector_t sector = sh->dev[i].sector;-			struct bio *wbi = sh->dev[i].written;-			while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {-				copy_data(1, wbi, sh->dev[i].page, sector);-				wbi = r5_next_bio(wbi, sector);+	for (i = disks; i--;) {+		struct bio *wbi = sh->dev[i].written;+		sector_t sector;++		if (!wbi)+			continue;++		sector = sh->dev[i].sector;+		set_bit(R5_LOCKED, &sh->dev[i].flags);+		BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));++		/* check if it's covered by a single page+		   and whole stripe is written at once.+		 * in this case we can avoid memcpy() */+		if (!wbi->bi_next && test_bit(R5_OVERWRITE, &sh->dev[i].flags) &&+		    test_bit(R5_Insync, &sh->dev[i].flags)) {+			page = zero_copy_data(wbi, sector);+			if (page) {+				atomic_inc(&conf->writes_zcopy);+				sh->dev[i].req.bi_io_vec[0].bv_page = page;+				set_bit(R5_Direct, &sh->dev[i].flags);+				clear_bit(R5_UPTODATE, &sh->dev[i].flags);+				clear_bit(R5_OVERWRITE, &sh->dev[i].flags);+				continue; 			}+		} -			set_bit(R5_LOCKED, &sh->dev[i].flags);-			set_bit(R5_UPTODATE, &sh->dev[i].flags);+		atomic_inc(&conf->writes_copied);+		test_and_clear_bit(R5_OVERWRITE, &sh->dev[i].flags);+		set_bit(R5_UPTODATE, &sh->dev[i].flags);+		while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {+			copy_data(1, wbi, sh->dev[i].page, sector);+			wbi = r5_next_bio(wbi, sector); 		}+	} +	h_ptr[0] = ptr[0]; 	switch(method) { 	case RECONSTRUCT_WRITE: 	case CHECK_PARITY:-		for (i=disks; i--;)-			if (i != pd_idx) {-				ptr[count++] = page_address(sh->dev[i].page);-				check_xor();+		for (i=disks; i--;) {+			if (i == pd_idx)+				continue;+			if (test_bit(R5_Direct, &sh->dev[i].flags))+				page = sh->dev[i].req.bi_io_vec[0].bv_page;+			else+				page = sh->dev[i].page;++			/* have to compute the parity immediately for+			 * a highmem page. it would happen for zerocopy. -jay+			 */+			if (PageHighMem(page)) {+				h_ptr[1] = kmap_atomic(page, KM_USER0);+				xor_block(2, STRIPE_SIZE, h_ptr);+				kunmap_atomic(page, KM_USER0);+			} else {+				ptr[count++] = page_address(page); 			}+			check_xor();+		} 		break; 	case READ_MODIFY_WRITE:-		for (i = disks; i--;)-			if (sh->dev[i].written) {-				ptr[count++] = page_address(sh->dev[i].page);-				check_xor();+		for (i = disks; i--;) {+			if (!sh->dev[i].written)+				continue;+			if (test_bit(R5_Direct, &sh->dev[i].flags))+				page = sh->dev[i].req.bi_io_vec[0].bv_page;+			else+				page = sh->dev[i].page;++			/* have to compute the parity immediately for+			 * a highmem page. it would happen for zerocopy. -jay+			 */+			if (PageHighMem(page)) {+				h_ptr[1] = kmap_atomic(page, KM_USER0);+				xor_block(2, STRIPE_SIZE, h_ptr);+				kunmap_atomic(page, KM_USER0);+			} else {+				ptr[count++] = page_address(page); 			}+			check_xor();+		} 	} 	if (count != 1) 		xor_block(count, STRIPE_SIZE, ptr);@@ -1059,13 +1139,15 @@ static void handle_stripe(struct stripe_ 		if (sh->dev[i].written) { 		    dev = &sh->dev[i]; 		    if (!test_bit(R5_LOCKED, &dev->flags) &&-			 test_bit(R5_UPTODATE, &dev->flags) ) {+			 (test_bit(R5_UPTODATE, &dev->flags) ||+			  	test_bit(R5_Direct, &dev->flags)) ) { 			/* We can return any write requests */ 			    struct bio *wbi, *wbi2; 			    PRINTK("Return write for disc %d\n", i); 			    spin_lock_irq(&conf->device_lock); 			    wbi = dev->written; 			    dev->written = NULL;+			    test_and_clear_bit(R5_Direct, &dev->flags); 			    while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { 				    wbi2 = r5_next_bio(wbi, dev->sector); 				    if (--wbi->bi_phys_segments == 0) {@@ -1831,6 +1913,7 @@ memory = conf->max_nr_stripes * (sizeof( 		if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 			mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 	}+	mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONST_WRITE;  	/* Ok, everything is just fine now */ 	mddev->array_size =  mddev->size * (mddev->raid_disks - 1);@@ -1918,9 +2001,11 @@ static void status (struct seq_file *seq 			atomic_read(&conf->handled_in_raid5d), 			atomic_read(&conf->out_of_stripes), 			atomic_read(&conf->handle_called));-	seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",+	seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u", 			atomic_read(&conf->reads_for_rmw),-			atomic_read(&conf->reads_for_rcw));+			atomic_read(&conf->reads_for_rcw),+			atomic_read(&conf->writes_zcopy),+			atomic_read(&conf->writes_copied)); 	seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", 			atomic_read(&conf->delayed), 			atomic_read(&conf->active_stripes),diff -pru linux-2.6.9.orig/include/linux/backing-dev.h linux-2.6.9/include/linux/backing-dev.h--- linux-2.6.9.orig/include/linux/backing-dev.h	2004-10-18 15:53:46.000000000 -0600+++ linux-2.6.9/include/linux/backing-dev.h	2007-07-13 00:12:46.000000000 -0600@@ -30,8 +30,11 @@ struct backing_dev_info { 	void *congested_data;	/* Pointer to aux data for congested func */ 	void (*unplug_io_fn)(struct backing_dev_info *, struct page *); 	void *unplug_io_data;+	unsigned int capabilities; }; +#define BDI_CAP_PAGE_CONST_WRITE      0x00000001+ extern struct backing_dev_info default_backing_dev_info; void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page); @@ -62,4 +65,7 @@ static inline int bdi_rw_congested(struc 				  (1 << BDI_write_congested)); } +#define mapping_cap_page_constant_write(mapping) \+	((mapping)->backing_dev_info->capabilities & BDI_CAP_PAGE_CONST_WRITE)+ #endif		/* _LINUX_BACKING_DEV_H */diff -pru linux-2.6.9.orig/include/linux/page-flags.h linux-2.6.9/include/linux/page-flags.h--- linux-2.6.9.orig/include/linux/page-flags.h	2004-10-18 15:54:39.000000000 -0600+++ linux-2.6.9/include/linux/page-flags.h	2007-07-13 00:12:46.000000000 -0600@@ -74,6 +74,7 @@ #define PG_swapcache		16	/* Swap page: swp_entry_t in private */ #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */ #define PG_reclaim		18	/* To be reclaimed asap */+#define PG_constant		19  /* To mark the page is constant */   /*@@ -298,6 +299,11 @@ extern unsigned long __read_page_state(u #define PageSwapCache(page)	0 #endif +#define PageConstant(page) test_bit(PG_constant, &(page)->flags)+#define SetPageConstant(page) set_bit(PG_constant, &(page)->flags)+#define ClearPageConstant(page) clear_bit(PG_constant, &(page->flags))+#define TestSetPageConstant(page) test_and_set_bit(PG_constant, &(page)->flags)+ struct page;	/* forward declaration */  int test_clear_page_dirty(struct page *page);diff -pru linux-2.6.9.orig/include/linux/pagemap.h linux-2.6.9/include/linux/pagemap.h--- linux-2.6.9.orig/include/linux/pagemap.h	2004-10-18 15:53:06.000000000 -0600+++ linux-2.6.9/include/linux/pagemap.h	2007-07-13 00:12:46.000000000 -0600@@ -191,6 +191,19 @@ static inline void wait_on_page_writebac  extern void end_page_writeback(struct page *page); +extern int set_page_constant(struct page *page);+extern void clear_page_constant(struct page *);+static inline int set_page_constant_lock(struct page *page)+{+        BUG_ON(PageLocked(page));+        lock_page(page);+        if (set_page_constant(page)) {+                unlock_page(page);+                return 1;+        }+        return 0;+}+ /*  * Fault a userspace page into pagetables.  Return non-zero on a fault.  *diff -pru linux-2.6.9.orig/include/linux/raid/raid5.h linux-2.6.9/include/linux/raid/raid5.h--- linux-2.6.9.orig/include/linux/raid/raid5.h	2007-07-09 02:43:33.000000000 -0600+++ linux-2.6.9/include/linux/raid/raid5.h	2007-07-13 00:39:15.000000000 -0600@@ -153,6 +153,7 @@ struct stripe_head { #define	R5_Wantread	4	/* want to schedule a read */ #define	R5_Wantwrite	5 #define	R5_Syncio	6	/* this io need to be accounted as resync io */+#define	R5_Direct	7	/* use page from passed bio to avoid memcpy */  /*  * Write method@@ -234,6 +235,8 @@ struct raid5_private_data { 	atomic_t		out_of_stripes; 	atomic_t		reads_for_rmw; 	atomic_t		reads_for_rcw;+	atomic_t 		writes_zcopy;+	atomic_t		writes_copied; 	atomic_t		handle_called; 	atomic_t		delayed; 	atomic_t		in_reqs_in_queue;diff -pru linux-2.6.9.orig/mm/filemap.c linux-2.6.9/mm/filemap.c--- linux-2.6.9.orig/mm/filemap.c	2007-07-09 02:43:33.000000000 -0600+++ linux-2.6.9/mm/filemap.c	2007-07-13 00:12:46.000000000 -0600@@ -27,6 +27,8 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/security.h>+#include <linux/rmap.h>+ /*  * This is needed for the following functions:  *  - try_to_release_page@@ -486,11 +488,52 @@ void end_page_writeback(struct page *pag 			BUG(); 		smp_mb__after_clear_bit(); 	}+	clear_page_constant(page); 	wake_up_page(page); }  EXPORT_SYMBOL(end_page_writeback); +/* Mark a page in bio to be constant, page must be locked */+int set_page_constant(struct page *page)+{+	BUG_ON(!PageLocked(page));++	/* If it's an anonymous page and haven't been added to swap cache, +	 * do it here.+	 */+	if (PageAnon(page) && !PageSwapCache(page))+		return 1;++	BUG_ON(!PageUptodate(page));++	/* I have to clear page uptodate before trying to remove+	 * it from user's page table because otherwise, the page may be+	 * reinstalled by a page access which happens between try_to_unmap()+	 * and ClearPageUptodate(). -jay+	 */+	ClearPageUptodate(page);+	if (page_mapped(page) && try_to_unmap(page) != SWAP_SUCCESS) {+		SetPageUptodate(page);+		return 1;+	}+	SetPageConstant(page);+	return 0;+}++void clear_page_constant(struct page *page)+{+	if (PageConstant(page)) {+		BUG_ON(!PageLocked(page));+		BUG_ON(PageUptodate(page));+		ClearPageConstant(page);+		SetPageUptodate(page);+		unlock_page(page);+	}+}+EXPORT_SYMBOL(set_page_constant);+EXPORT_SYMBOL(clear_page_constant);+ /*  * Get a lock on the page, assuming we need to sleep to get it.  *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -