⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 raid5-optimize-memcpy.patch

📁 非常经典的一个分布式系统
💻 PATCH
字号:
In case of full-stripe writes don't copy data into internal cache.This optimization reduces CPU load by 30% rougly.Index: linux-2.6.9/include/linux/raid/raid5.h===================================================================--- linux-2.6.9.orig/include/linux/raid/raid5.h	2006-05-21 17:57:25.000000000 +0400+++ linux-2.6.9/include/linux/raid/raid5.h	2006-05-22 00:10:04.000000000 +0400@@ -152,6 +152,7 @@ struct stripe_head { #define	R5_Wantread	4	/* want to schedule a read */ #define	R5_Wantwrite	5 #define	R5_Syncio	6	/* this io need to be accounted as resync io */+#define	R5_Direct	7	/* use page fom passed bio to avoid memcpy */  /*  * Write methodIndex: linux-2.6.9/drivers/md/raid5.c===================================================================--- linux-2.6.9.orig/drivers/md/raid5.c	2006-05-22 00:10:01.000000000 +0400+++ linux-2.6.9/drivers/md/raid5.c	2006-05-22 00:10:04.000000000 +0400@@ -411,6 +411,8 @@ static int raid5_end_read_request (struc 		clear_buffer_uptodate(bh); 	} #endif+	if (test_bit(R5_Direct, &sh->dev[i].flags))+		printk("R5_Direct for READ ?!\n"); 	clear_bit(R5_LOCKED, &sh->dev[i].flags); 	set_bit(STRIPE_HANDLE, &sh->state); 	release_stripe(sh);@@ -449,6 +451,10 @@ static int raid5_end_write_request (stru  	rdev_dec_pending(conf->disks[i].rdev, conf->mddev); 	+	if (test_bit(R5_Direct, &sh->dev[i].flags)) {+		BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);+		sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;+	} 	clear_bit(R5_LOCKED, &sh->dev[i].flags); 	set_bit(STRIPE_HANDLE, &sh->state); 	__release_stripe(conf, sh);@@ -673,6 +679,49 @@ static void copy_data(int frombio, struc 	} } +static struct page *zero_copy_data(struct bio *bio, sector_t sector)+{+	struct bio_vec *bvl;+	int i;++	for (;bio && bio->bi_sector < sector+STRIPE_SECTORS;+	      bio = r5_next_bio(bio, sector) ) {+		int page_offset;+		if (bio->bi_sector >= sector)+			page_offset = (signed)(bio->bi_sector - sector) * 512;+		else +			page_offset = (signed)(sector - bio->bi_sector) * -512;+		bio_for_each_segment(bvl, bio, i) {+			int len = bio_iovec_idx(bio,i)->bv_len;+			int clen;+			int b_offset = 0;			++			if (page_offset < 0) {+				b_offset = -page_offset;+				page_offset += b_offset;+				len -= b_offset;+			}++			if (len > 0 && page_offset + len > STRIPE_SIZE)+				clen = STRIPE_SIZE - page_offset;	+			else clen = len;+			+			if (clen > 0) {+				BUG_ON(clen < STRIPE_SIZE);+				/*printk("  sector %lu: page %p from index %u\n",+					(unsigned long) sector,+					bio_iovec_idx(bio, i)->bv_page, i);*/+				return bio_iovec_idx(bio, i)->bv_page;+			}	+			if (clen < len) /* hit end of page */+				break;+			page_offset +=  len;+		}+	}+	BUG();+	return NULL;+}+ #define check_xor() 	do { 						\ 			   if (count == MAX_XOR_BLOCKS) {		\ 				xor_block(count, STRIPE_SIZE, ptr);	\@@ -717,6 +766,8 @@ static void compute_parity(struct stripe 	int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; 	void *ptr[MAX_XOR_BLOCKS]; 	struct bio *chosen;+	struct page *page;+	int zerocopy = 0;  	PRINTK("compute_parity, stripe %llu, method %d\n", 		(unsigned long long)sh->sector, method);@@ -743,13 +794,17 @@ static void compute_parity(struct stripe 		break; 	case RECONSTRUCT_WRITE: 		memset(ptr[0], 0, STRIPE_SIZE);-		for (i= disks; i-- ;)+		zerocopy = 1;+		for (i= disks; i-- ;) {+			if (i != pd_idx && !sh->dev[i].towrite)+				zerocopy = 0; 			if (i!=pd_idx && sh->dev[i].towrite) { 				chosen = sh->dev[i].towrite; 				sh->dev[i].towrite = NULL; 				if (sh->dev[i].written) BUG(); 				sh->dev[i].written = chosen; 			}+		} 		break; 	case CHECK_PARITY: 		break;@@ -759,34 +814,62 @@ static void compute_parity(struct stripe 		count = 1; 	} 	-	for (i = disks; i--;)-		if (sh->dev[i].written) {-			sector_t sector = sh->dev[i].sector;-			struct bio *wbi = sh->dev[i].written;-			while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {-				copy_data(1, wbi, sh->dev[i].page, sector);-				wbi = r5_next_bio(wbi, sector);-			}+	for (i = disks; i--;) {+		struct bio *wbi = sh->dev[i].written;+		sector_t sector;++		if (!wbi)+			continue;++		sector = sh->dev[i].sector;+		set_bit(R5_LOCKED, &sh->dev[i].flags);+		BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));++		/* check if it's covered by a single page+		   and whole stripe is written at once.+		 * in this case we can avoid memcpy() */+		if (zerocopy && wbi && wbi->bi_next == NULL && +				test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {+			page = zero_copy_data(wbi, sector);+			BUG_ON(PageHighMem(page));+			sh->dev[i].req.bi_io_vec[0].bv_page = page;+			set_bit(R5_Direct, &sh->dev[i].flags);+			clear_bit(R5_UPTODATE, &sh->dev[i].flags);+			continue;+		} -			set_bit(R5_LOCKED, &sh->dev[i].flags);-			set_bit(R5_UPTODATE, &sh->dev[i].flags);+		set_bit(R5_UPTODATE, &sh->dev[i].flags);+		while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {+			copy_data(1, wbi, sh->dev[i].page, sector);+			wbi = r5_next_bio(wbi, sector); 		}+	}  	switch(method) { 	case RECONSTRUCT_WRITE: 	case CHECK_PARITY:-		for (i=disks; i--;)-			if (i != pd_idx) {-				ptr[count++] = page_address(sh->dev[i].page);-				check_xor();-			}+		for (i=disks; i--;) {+			if (i == pd_idx)+				continue;+			if (test_bit(R5_Direct, &sh->dev[i].flags))+				page = sh->dev[i].req.bi_io_vec[0].bv_page;+			else+				page = sh->dev[i].page;+			ptr[count++] = page_address(page);+			check_xor();+		} 		break; 	case READ_MODIFY_WRITE:-		for (i = disks; i--;)-			if (sh->dev[i].written) {-				ptr[count++] = page_address(sh->dev[i].page);-				check_xor();-			}+		for (i = disks; i--;) {+			if (!sh->dev[i].written)+				continue;+			if (test_bit(R5_Direct, &sh->dev[i].flags))+				page = sh->dev[i].req.bi_io_vec[0].bv_page;+			else+				page = sh->dev[i].page;+			ptr[count++] = page_address(page);+			check_xor();+		} 	} 	if (count != 1) 		xor_block(count, STRIPE_SIZE, ptr);@@ -1012,7 +1094,7 @@ static void handle_stripe(struct stripe_ 	dev = &sh->dev[sh->pd_idx]; 	if ( written && 	     ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&-		test_bit(R5_UPTODATE, &dev->flags))+		(test_bit(R5_UPTODATE, &dev->flags) || test_bit(R5_Direct, &dev->flags))) 	       || (failed == 1 && failed_num == sh->pd_idx)) 	    ) { 	    /* any written block on an uptodate or failed drive can be returned.@@ -1023,13 +1105,16 @@ static void handle_stripe(struct stripe_ 		if (sh->dev[i].written) { 		    dev = &sh->dev[i]; 		    if (!test_bit(R5_LOCKED, &dev->flags) &&-			 test_bit(R5_UPTODATE, &dev->flags) ) {+			 (test_bit(R5_UPTODATE, &dev->flags) ||+			  	test_bit(R5_Direct, &dev->flags)) ) { 			/* We can return any write requests */ 			    struct bio *wbi, *wbi2; 			    PRINTK("Return write for disc %d\n", i); 			    spin_lock_irq(&conf->device_lock); 			    wbi = dev->written; 			    dev->written = NULL;+			    if (test_bit(R5_Direct, &dev->flags))+				    clear_bit(R5_Direct, &dev->flags); 			    while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { 				    wbi2 = r5_next_bio(wbi, dev->sector); 				    if (--wbi->bi_phys_segments == 0) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -