raid10.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,781 行 · 第 1/4 页
C
1,781 行
/* * raid10.c : Multiple Devices driver for Linux * * Copyright (C) 2000-2004 Neil Brown * * RAID-10 support for md. * * Base on code in raid1.c. See raid1.c for futher copyright information. * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/raid/raid10.h>/* * RAID10 provides a combination of RAID0 and RAID1 functionality. * The layout of data is defined by * chunk_size * raid_disks * near_copies (stored in low byte of layout) * far_copies (stored in second byte of layout) * * The data to be stored is divided into chunks using chunksize. * Each device is divided into far_copies sections. * In each section, chunks are laid out in a style similar to raid0, but * near_copies copies of each chunk is stored (each on a different drive). * The starting device for each section is offset near_copies from the starting * device of the previous section. * Thus there are (near_copies*far_copies) of each chunk, and each is on a different * drive. * near_copies and far_copies must be at least one, and there product is at most * raid_disks. *//* * Number of guaranteed r10bios in case of extreme VM load: */#define NR_RAID10_BIOS 256static void unplug_slaves(mddev_t *mddev);static void * r10bio_pool_alloc(int gfp_flags, void *data){ conf_t *conf = data; r10bio_t *r10_bio; int size = offsetof(struct r10bio_s, devs[conf->copies]); /* allocate a r10bio with room for raid_disks entries in the bios array */ r10_bio = kmalloc(size, gfp_flags); if (r10_bio) memset(r10_bio, 0, size); else unplug_slaves(conf->mddev); return r10_bio;}static void r10bio_pool_free(void *r10_bio, void *data){ kfree(r10_bio);}#define RESYNC_BLOCK_SIZE (64*1024)//#define RESYNC_BLOCK_SIZE PAGE_SIZE#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)#define RESYNC_WINDOW (2048*1024)/* * When performing a resync, we need to read and compare, so * we need as many pages are there are copies. * When performing a recovery, we need 2 bios, one for read, * one for write (we recover only one drive per r10buf) * */static void * r10buf_pool_alloc(int gfp_flags, void *data){ conf_t *conf = data; struct page *page; r10bio_t *r10_bio; struct bio *bio; int i, j; int nalloc; r10_bio = r10bio_pool_alloc(gfp_flags, conf); if (!r10_bio) { unplug_slaves(conf->mddev); return NULL; } if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) nalloc = conf->copies; /* resync */ else nalloc = 2; /* recovery */ /* * Allocate bios. */ for (j = nalloc ; j-- ; ) { bio = bio_alloc(gfp_flags, RESYNC_PAGES); if (!bio) goto out_free_bio; r10_bio->devs[j].bio = bio; } /* * Allocate RESYNC_PAGES data pages and attach them * where needed. */ for (j = 0 ; j < nalloc; j++) { bio = r10_bio->devs[j].bio; for (i = 0; i < RESYNC_PAGES; i++) { page = alloc_page(gfp_flags); if (unlikely(!page)) goto out_free_pages; bio->bi_io_vec[i].bv_page = page; } } return r10_bio;out_free_pages: for ( ; i > 0 ; i--) __free_page(bio->bi_io_vec[i-1].bv_page); while (j--) for (i = 0; i < RESYNC_PAGES ; i++) __free_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); j = -1;out_free_bio: while ( ++j < nalloc ) bio_put(r10_bio->devs[j].bio); r10bio_pool_free(r10_bio, conf); return NULL;}static void r10buf_pool_free(void *__r10_bio, void *data){ int i; conf_t *conf = data; r10bio_t *r10bio = __r10_bio; int j; for (j=0; j < conf->copies; j++) { struct bio *bio = r10bio->devs[j].bio; if (bio) { for (i = 0; i < RESYNC_PAGES; i++) { __free_page(bio->bi_io_vec[i].bv_page); bio->bi_io_vec[i].bv_page = NULL; } bio_put(bio); } } r10bio_pool_free(r10bio, conf);}static void put_all_bios(conf_t *conf, r10bio_t *r10_bio){ int i; for (i = 0; i < conf->copies; i++) { struct bio **bio = & r10_bio->devs[i].bio; if (*bio) bio_put(*bio); *bio = NULL; }}static inline void free_r10bio(r10bio_t *r10_bio){ unsigned long flags; conf_t *conf = mddev_to_conf(r10_bio->mddev); /* * Wake up any possible resync thread that waits for the device * to go idle. */ spin_lock_irqsave(&conf->resync_lock, flags); if (!--conf->nr_pending) { wake_up(&conf->wait_idle); wake_up(&conf->wait_resume); } spin_unlock_irqrestore(&conf->resync_lock, flags); put_all_bios(conf, r10_bio); mempool_free(r10_bio, conf->r10bio_pool);}static inline void put_buf(r10bio_t *r10_bio){ conf_t *conf = mddev_to_conf(r10_bio->mddev); unsigned long flags; mempool_free(r10_bio, conf->r10buf_pool); spin_lock_irqsave(&conf->resync_lock, flags); if (!conf->barrier) BUG(); --conf->barrier; wake_up(&conf->wait_resume); wake_up(&conf->wait_idle); if (!--conf->nr_pending) { wake_up(&conf->wait_idle); wake_up(&conf->wait_resume); } spin_unlock_irqrestore(&conf->resync_lock, flags);}static void reschedule_retry(r10bio_t *r10_bio){ unsigned long flags; mddev_t *mddev = r10_bio->mddev; conf_t *conf = mddev_to_conf(mddev); spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread);}/* * raid_end_bio_io() is called when we have finished servicing a mirrored * operation and are ready to return a success/failure code to the buffer * cache layer. */static void raid_end_bio_io(r10bio_t *r10_bio){ struct bio *bio = r10_bio->master_bio; bio_endio(bio, bio->bi_size, test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO); free_r10bio(r10_bio);}/* * Update disk head position estimator based on IRQ completion info. */static inline void update_head_pos(int slot, r10bio_t *r10_bio){ conf_t *conf = mddev_to_conf(r10_bio->mddev); conf->mirrors[r10_bio->devs[slot].devnum].head_position = r10_bio->devs[slot].addr + (r10_bio->sectors);}static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int error){ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; conf_t *conf = mddev_to_conf(r10_bio->mddev); if (bio->bi_size) return 1; slot = r10_bio->read_slot; dev = r10_bio->devs[slot].devnum; /* * this branch is our 'one mirror IO has finished' event handler: */ if (!uptodate) md_error(r10_bio->mddev, conf->mirrors[dev].rdev); else /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code to the higher * levels even if IO on some other mirrored buffer fails. * * The 'master' represents the composite IO operation to * user-side. So if something waits for IO, then it will * wait for the 'master' bio. */ set_bit(R10BIO_Uptodate, &r10_bio->state); update_head_pos(slot, r10_bio); /* * we have only one bio on the read side */ if (uptodate) raid_end_bio_io(r10_bio); else { /* * oops, read error: */ char b[BDEVNAME_SIZE]; if (printk_ratelimit()) printk(KERN_ERR "raid10: %s: rescheduling sector %llu\n", bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); reschedule_retry(r10_bio); } rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); return 0;}static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, int error){ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; conf_t *conf = mddev_to_conf(r10_bio->mddev); if (bio->bi_size) return 1; for (slot = 0; slot < conf->copies; slot++) if (r10_bio->devs[slot].bio == bio) break; dev = r10_bio->devs[slot].devnum; /* * this branch is our 'one mirror IO has finished' event handler: */ if (!uptodate) md_error(r10_bio->mddev, conf->mirrors[dev].rdev); else /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher * levels even if IO on some other mirrored buffer fails. * * The 'master' represents the composite IO operation to * user-side. So if something waits for IO, then it will * wait for the 'master' bio. */ set_bit(R10BIO_Uptodate, &r10_bio->state); update_head_pos(slot, r10_bio); /* * * Let's see if all mirrored write operations have finished * already. */ if (atomic_dec_and_test(&r10_bio->remaining)) { md_write_end(r10_bio->mddev); raid_end_bio_io(r10_bio); } rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); return 0;}/* * RAID10 layout manager * Aswell as the chunksize and raid_disks count, there are two * parameters: near_copies and far_copies. * near_copies * far_copies must be <= raid_disks. * Normally one of these will be 1. * If both are 1, we get raid0. * If near_copies == raid_disks, we get raid1. * * Chunks are layed out in raid0 style with near_copies copies of the * first chunk, followed by near_copies copies of the next chunk and * so on. * If far_copies > 1, then after 1/far_copies of the array has been assigned * as described above, we start again with a device offset of near_copies. * So we effectively have another copy of the whole array further down all * the drives, but with blocks on different drives. * With this layout, and block is never stored twice on the one device. * * raid10_find_phys finds the sector offset of a given virtual sector * on each device that it is on. If a block isn't on a device, * that entry in the array is set to MaxSector. * * raid10_find_virt does the reverse mapping, from a device and a * sector offset to a virtual address */static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio){ int n,f; sector_t sector; sector_t chunk; sector_t stripe; int dev; int slot = 0; /* now calculate first sector/dev */ chunk = r10bio->sector >> conf->chunk_shift; sector = r10bio->sector & conf->chunk_mask; chunk *= conf->near_copies; stripe = chunk; dev = sector_div(stripe, conf->raid_disks); sector += stripe << conf->chunk_shift; /* and calculate all the others */ for (n=0; n < conf->near_copies; n++) { int d = dev; sector_t s = sector; r10bio->devs[slot].addr = sector; r10bio->devs[slot].devnum = d; slot++; for (f = 1; f < conf->far_copies; f++) { d += conf->near_copies; if (d >= conf->raid_disks) d -= conf->raid_disks; s += conf->stride; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; slot++; } dev++; if (dev >= conf->raid_disks) { dev = 0; sector += (conf->chunk_mask + 1); } } BUG_ON(slot != conf->copies);}static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev){ sector_t offset, chunk, vchunk; while (sector > conf->stride) { sector -= conf->stride; if (dev < conf->near_copies) dev += conf->raid_disks - conf->near_copies; else dev -= conf->near_copies; } offset = sector & conf->chunk_mask; chunk = sector >> conf->chunk_shift; vchunk = chunk * conf->raid_disks + dev; sector_div(vchunk, conf->near_copies); return (vchunk << conf->chunk_shift) + offset;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?