raid5.c

来自「linux 内核源代码」· C语言 代码 · 共 2,325 行 · 第 1/5 页

C
2,325
字号
async_copy_data(int frombio, struct bio *bio, struct page *page,	sector_t sector, struct dma_async_tx_descriptor *tx){	struct bio_vec *bvl;	struct page *bio_page;	int i;	int page_offset;	if (bio->bi_sector >= sector)		page_offset = (signed)(bio->bi_sector - sector) * 512;	else		page_offset = (signed)(sector - bio->bi_sector) * -512;	bio_for_each_segment(bvl, bio, i) {		int len = bio_iovec_idx(bio, i)->bv_len;		int clen;		int b_offset = 0;		if (page_offset < 0) {			b_offset = -page_offset;			page_offset += b_offset;			len -= b_offset;		}		if (len > 0 && page_offset + len > STRIPE_SIZE)			clen = STRIPE_SIZE - page_offset;		else			clen = len;		if (clen > 0) {			b_offset += bio_iovec_idx(bio, i)->bv_offset;			bio_page = bio_iovec_idx(bio, i)->bv_page;			if (frombio)				tx = async_memcpy(page, bio_page, page_offset,					b_offset, clen,					ASYNC_TX_DEP_ACK,					tx, NULL, NULL);			else				tx = async_memcpy(bio_page, page, b_offset,					page_offset, clen,					ASYNC_TX_DEP_ACK,					tx, NULL, NULL);		}		if (clen < len) /* hit end of page */			break;		page_offset +=  len;	}	return tx;}static void ops_complete_biofill(void *stripe_head_ref){	struct stripe_head *sh = stripe_head_ref;	struct bio *return_bi = NULL;	raid5_conf_t *conf = sh->raid_conf;	int i;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	/* clear completed biofills */	for (i = sh->disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		/* acknowledge completion of a biofill operation */		/* and check if we need to reply to a read request,		 * new R5_Wantfill requests are held off until		 * !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)		 */		if (test_and_clear_bit(R5_Wantfill, &dev->flags)) {			struct bio *rbi, *rbi2;			/* The access to dev->read is outside of the			 * spin_lock_irq(&conf->device_lock), but is protected			 * by the STRIPE_OP_BIOFILL pending bit			 */			BUG_ON(!dev->read);			rbi = dev->read;			dev->read = NULL;			while (rbi && rbi->bi_sector <				dev->sector + STRIPE_SECTORS) {				rbi2 = r5_next_bio(rbi, dev->sector);				spin_lock_irq(&conf->device_lock);				if (--rbi->bi_phys_segments == 0) {					rbi->bi_next = return_bi;					return_bi = rbi;				}				spin_unlock_irq(&conf->device_lock);				rbi = rbi2;			}		}	}	set_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);	return_io(return_bi);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static void ops_run_biofill(struct stripe_head *sh){	struct dma_async_tx_descriptor *tx = NULL;	raid5_conf_t *conf = sh->raid_conf;	int i;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	for (i = sh->disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		if (test_bit(R5_Wantfill, &dev->flags)) {			struct bio *rbi;			spin_lock_irq(&conf->device_lock);			dev->read = rbi = dev->toread;			dev->toread = NULL;			spin_unlock_irq(&conf->device_lock);			while (rbi && rbi->bi_sector <				dev->sector + STRIPE_SECTORS) {				tx = async_copy_data(0, rbi, dev->page,					dev->sector, tx);				rbi = r5_next_bio(rbi, dev->sector);			}		}	}	atomic_inc(&sh->count);	async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,		ops_complete_biofill, sh);}static void ops_complete_compute5(void *stripe_head_ref){	struct stripe_head *sh = stripe_head_ref;	int target = sh->ops.target;	struct r5dev *tgt = &sh->dev[target];	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	set_bit(R5_UPTODATE, &tgt->flags);	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));	clear_bit(R5_Wantcompute, &tgt->flags);	set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh, unsigned long pending){	/* kernel stack size limits the total number of disks */	int disks = sh->disks;	struct page *xor_srcs[disks];	int target = sh->ops.target;	struct r5dev *tgt = &sh->dev[target];	struct page *xor_dest = tgt->page;	int count = 0;	struct dma_async_tx_descriptor *tx;	int i;	pr_debug("%s: stripe %llu block: %d\n",		__FUNCTION__, (unsigned long long)sh->sector, target);	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));	for (i = disks; i--; )		if (i != target)			xor_srcs[count++] = sh->dev[i].page;	atomic_inc(&sh->count);	if (unlikely(count == 1))		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,			0, NULL, ops_complete_compute5, sh);	else		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,			ASYNC_TX_XOR_ZERO_DST, NULL,			ops_complete_compute5, sh);	/* ack now if postxor is not set to be run */	if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending))		async_tx_ack(tx);	return tx;}static void ops_complete_prexor(void *stripe_head_ref){	struct stripe_head *sh = stripe_head_ref;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);}static struct dma_async_tx_descriptor *ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx){	/* kernel stack size limits the total number of disks */	int disks = sh->disks;	struct page *xor_srcs[disks];	int count = 0, pd_idx = sh->pd_idx, i;	/* existing parity data subtracted */	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	for (i = disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		/* Only process blocks that are known to be uptodate */		if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags))			xor_srcs[count++] = dev->page;	}	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,		ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,		ops_complete_prexor, sh);	return tx;}static struct dma_async_tx_descriptor *ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,		 unsigned long pending){	int disks = sh->disks;	int pd_idx = sh->pd_idx, i;	/* check if prexor is active which means only process blocks	 * that are part of a read-modify-write (Wantprexor)	 */	int prexor = test_bit(STRIPE_OP_PREXOR, &pending);	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	for (i = disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		struct bio *chosen;		int towrite;		towrite = 0;		if (prexor) { /* rmw */			if (dev->towrite &&			    test_bit(R5_Wantprexor, &dev->flags))				towrite = 1;		} else { /* rcw */			if (i != pd_idx && dev->towrite &&				test_bit(R5_LOCKED, &dev->flags))				towrite = 1;		}		if (towrite) {			struct bio *wbi;			spin_lock(&sh->lock);			chosen = dev->towrite;			dev->towrite = NULL;			BUG_ON(dev->written);			wbi = dev->written = chosen;			spin_unlock(&sh->lock);			while (wbi && wbi->bi_sector <				dev->sector + STRIPE_SECTORS) {				tx = async_copy_data(1, wbi, dev->page,					dev->sector, tx);				wbi = r5_next_bio(wbi, dev->sector);			}		}	}	return tx;}static void ops_complete_postxor(void *stripe_head_ref){	struct stripe_head *sh = stripe_head_ref;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static void ops_complete_write(void *stripe_head_ref){	struct stripe_head *sh = stripe_head_ref;	int disks = sh->disks, i, pd_idx = sh->pd_idx;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	for (i = disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		if (dev->written || i == pd_idx)			set_bit(R5_UPTODATE, &dev->flags);	}	set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);	set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static voidops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,		unsigned long pending){	/* kernel stack size limits the total number of disks */	int disks = sh->disks;	struct page *xor_srcs[disks];	int count = 0, pd_idx = sh->pd_idx, i;	struct page *xor_dest;	int prexor = test_bit(STRIPE_OP_PREXOR, &pending);	unsigned long flags;	dma_async_tx_callback callback;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	/* check if prexor is active which means only process blocks	 * that are part of a read-modify-write (written)	 */	if (prexor) {		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;		for (i = disks; i--; ) {			struct r5dev *dev = &sh->dev[i];			if (dev->written)				xor_srcs[count++] = dev->page;		}	} else {		xor_dest = sh->dev[pd_idx].page;		for (i = disks; i--; ) {			struct r5dev *dev = &sh->dev[i];			if (i != pd_idx)				xor_srcs[count++] = dev->page;		}	}	/* check whether this postxor is part of a write */	callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ?		ops_complete_write : ops_complete_postxor;	/* 1/ if we prexor'd then the dest is reused as a source	 * 2/ if we did not prexor then we are redoing the parity	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST	 * for the synchronous xor case	 */	flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);	atomic_inc(&sh->count);	if (unlikely(count == 1)) {		flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,			flags, tx, callback, sh);	} else		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,			flags, tx, callback, sh);}static void ops_complete_check(void *stripe_head_ref){	struct stripe_head *sh = stripe_head_ref;	int pd_idx = sh->pd_idx;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&		sh->ops.zero_sum_result == 0)		set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);	set_bit(STRIPE_OP_CHECK, &sh->ops.complete);	set_bit(STRIPE_HANDLE, &sh->state);	release_stripe(sh);}static void ops_run_check(struct stripe_head *sh){	/* kernel stack size limits the total number of disks */	int disks = sh->disks;	struct page *xor_srcs[disks];	struct dma_async_tx_descriptor *tx;	int count = 0, pd_idx = sh->pd_idx, i;	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;	pr_debug("%s: stripe %llu\n", __FUNCTION__,		(unsigned long long)sh->sector);	for (i = disks; i--; ) {		struct r5dev *dev = &sh->dev[i];		if (i != pd_idx)			xor_srcs[count++] = dev->page;	}	tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);	if (tx)		set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);	else		clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);	atomic_inc(&sh->count);	tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,		ops_complete_check, sh);}static void raid5_run_ops(struct stripe_head *sh, unsigned long pending){	int overlap_clear = 0, i, disks = sh->disks;	struct dma_async_tx_descriptor *tx = NULL;	if (test_bit(STRIPE_OP_BIOFILL, &pending)) {		ops_run_biofill(sh);		overlap_clear++;	}	if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending))		tx = ops_run_compute5(sh, pending);	if (test_bit(STRIPE_OP_PREXOR, &pending))		tx = ops_run_prexor(sh, tx);	if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {		tx = ops_run_biodrain(sh, tx, pending);		overlap_clear++;	}	if (test_bit(STRIPE_OP_POSTXOR, &pending))		ops_run_postxor(sh, tx, pending);	if (test_bit(STRIPE_OP_CHECK, &pending))		ops_run_check(sh);	if (test_bit(STRIPE_OP_IO, &pending))		ops_run_io(sh);	if (overlap_clear)		for (i = disks; i--; ) {			struct r5dev *dev = &sh->dev[i];			if (test_and_clear_bit(R5_Overlap, &dev->flags))				wake_up(&sh->raid_conf->wait_for_overlap);		}}static int grow_one_stripe(raid5_conf_t *conf){	struct stripe_head *sh;	sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);	if (!sh)		return 0;	memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));	sh->raid_conf = conf;	spin_lock_init(&sh->lock);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?