raid10.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,781 行 · 第 1/4 页
C
1,781 行
return sectors_skipped; } if (chunks_skipped >= conf->raid_disks) { /* if there has been nothing to do on any drive, * then there is nothing to do at all.. */ sector_t sec = max_sector - sector_nr; md_done_sync(mddev, sec, 1); return sec + sectors_skipped; } /* make sure whole request will fit in a chunk - if chunks * are meaningful */ if (conf->near_copies < conf->raid_disks && max_sector > (sector_nr | conf->chunk_mask)) max_sector = (sector_nr | conf->chunk_mask) + 1; /* * If there is non-resync activity waiting for us then * put in a delay to throttle resync. */ if (!go_faster && waitqueue_active(&conf->wait_resume)) schedule_timeout(HZ); device_barrier(conf, sector_nr + RESYNC_SECTORS); /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that * have bi_end_io, bi_sector, bi_bdev set, * and bi_private set to the r10bio. * For recovery, we may actually create several r10bios * with 2 bios in each, that correspond to the bios in the main one. * In this case, the subordinate r10bios link back through a * borrowed master_bio pointer, and the counter in the master * includes a ref from each subordinate. */ /* First, we decide what to do and set ->bi_end_io * To end_sync_read if we want to read, and * end_sync_write if we will want to write. */ if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ int i, j, k; r10_bio = NULL; for (i=0 ; i<conf->raid_disks; i++) if (conf->mirrors[i].rdev && !conf->mirrors[i].rdev->in_sync) { /* want to reconstruct this device */ r10bio_t *rb2 = r10_bio; r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); spin_lock_irq(&conf->resync_lock); conf->nr_pending++; if (rb2) conf->barrier++; spin_unlock_irq(&conf->resync_lock); atomic_set(&r10_bio->remaining, 0); r10_bio->master_bio = (struct bio*)rb2; if (rb2) atomic_inc(&rb2->remaining); r10_bio->mddev = mddev; set_bit(R10BIO_IsRecover, &r10_bio->state); r10_bio->sector = raid10_find_virt(conf, sector_nr, i); raid10_find_phys(conf, r10_bio); for (j=0; j<conf->copies;j++) { int d = r10_bio->devs[j].devnum; if (conf->mirrors[d].rdev && conf->mirrors[d].rdev->in_sync) { /* This is where we read from */ bio = r10_bio->devs[0].bio; bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio->bi_rw = 0; bio->bi_sector = r10_bio->devs[j].addr + conf->mirrors[d].rdev->data_offset; bio->bi_bdev = conf->mirrors[d].rdev->bdev; atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); /* and we write to 'i' */ for (k=0; k<conf->copies; k++) if (r10_bio->devs[k].devnum == i) break; bio = r10_bio->devs[1].bio; bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; bio->bi_rw = 1; bio->bi_sector = r10_bio->devs[k].addr + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; r10_bio->devs[0].devnum = d; r10_bio->devs[1].devnum = i; break; } } if (j == conf->copies) { BUG(); } } if (biolist == NULL) { while (r10_bio) { r10bio_t *rb2 = r10_bio; r10_bio = (r10bio_t*) rb2->master_bio; rb2->master_bio = NULL; put_buf(rb2); } goto giveup; } } else { /* resync. Schedule a read for every block at this virt offset */ int count = 0; r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); spin_lock_irq(&conf->resync_lock); conf->nr_pending++; spin_unlock_irq(&conf->resync_lock); r10_bio->mddev = mddev; atomic_set(&r10_bio->remaining, 0); r10_bio->master_bio = NULL; r10_bio->sector = sector_nr; set_bit(R10BIO_IsSync, &r10_bio->state); raid10_find_phys(conf, r10_bio); r10_bio->sectors = (sector_nr | conf->chunk_mask) - sector_nr +1; spin_lock_irq(&conf->device_lock); for (i=0; i<conf->copies; i++) { int d = r10_bio->devs[i].devnum; bio = r10_bio->devs[i].bio; bio->bi_end_io = NULL; if (conf->mirrors[d].rdev == NULL || conf->mirrors[d].rdev->faulty) continue; atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio->bi_rw = 0; bio->bi_sector = r10_bio->devs[i].addr + conf->mirrors[d].rdev->data_offset; bio->bi_bdev = conf->mirrors[d].rdev->bdev; count++; } spin_unlock_irq(&conf->device_lock); if (count < 2) { for (i=0; i<conf->copies; i++) { int d = r10_bio->devs[i].devnum; if (r10_bio->devs[i].bio->bi_end_io) atomic_dec(&conf->mirrors[d].rdev->nr_pending); } put_buf(r10_bio); goto giveup; } } for (bio = biolist; bio ; bio=bio->bi_next) { bio->bi_flags &= ~(BIO_POOL_MASK - 1); if (bio->bi_end_io) bio->bi_flags |= 1 << BIO_UPTODATE; bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; bio->bi_hw_segments = 0; bio->bi_size = 0; } nr_sectors = 0; do { struct page *page; int len = PAGE_SIZE; disk = 0; if (sector_nr + (len>>9) > max_sector) len = (max_sector - sector_nr) << 9; if (len == 0) break; for (bio= biolist ; bio ; bio=bio->bi_next) { page = bio->bi_io_vec[bio->bi_vcnt].bv_page; if (bio_add_page(bio, page, len, 0) == 0) { /* stop here */ struct bio *bio2; bio->bi_io_vec[bio->bi_vcnt].bv_page = page; for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) { /* remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_size -= len; bio2->bi_flags &= ~(1<< BIO_SEG_VALID); } goto bio_full; } disk = i; } nr_sectors += len>>9; sector_nr += len>>9; } while (biolist->bi_vcnt < RESYNC_PAGES); bio_full: r10_bio->sectors = nr_sectors; while (biolist) { bio = biolist; biolist = biolist->bi_next; bio->bi_next = NULL; r10_bio = bio->bi_private; r10_bio->sectors = nr_sectors; if (bio->bi_end_io == end_sync_read) { md_sync_acct(bio->bi_bdev, nr_sectors); generic_make_request(bio); } } return nr_sectors; giveup: /* There is nowhere to write, so all non-sync * drives must be failed, so try the next chunk... */ { int sec = max_sector - sector_nr; sectors_skipped += sec; chunks_skipped ++; sector_nr = max_sector; md_done_sync(mddev, sec, 1); goto skipped; }}static int run(mddev_t *mddev){ conf_t *conf; int i, disk_idx; mirror_info_t *disk; mdk_rdev_t *rdev; struct list_head *tmp; int nc, fc; sector_t stride, size; if (mddev->level != 10) { printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n", mdname(mddev), mddev->level); goto out; } nc = mddev->layout & 255; fc = (mddev->layout >> 8) & 255; if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || (mddev->layout >> 16)) { printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", mdname(mddev), mddev->layout); goto out; } /* * copy the already verified devices into our private RAID10 * bookkeeping area. [whatever we allocate in run(), * should be freed in stop()] */ conf = kmalloc(sizeof(conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out; } memset(conf, 0, sizeof(*conf)); conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->mirrors) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out_free_conf; } memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); conf->near_copies = nc; conf->far_copies = fc; conf->copies = nc*fc; conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; conf->chunk_shift = ffz(~mddev->chunk_size) - 9; stride = mddev->size >> (conf->chunk_shift-1); sector_div(stride, fc); conf->stride = stride << conf->chunk_shift; conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, r10bio_pool_free, conf); if (!conf->r10bio_pool) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out_free_conf; } mddev->queue->unplug_fn = raid10_unplug; mddev->queue->issue_flush_fn = raid10_issue_flush; ITERATE_RDEV(mddev, rdev, tmp) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) continue; disk = conf->mirrors + disk_idx; disk->rdev = rdev; blk_queue_stack_limits(mddev->queue, rdev->bdev->bd_disk->queue); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && mddev->queue->max_sectors > (PAGE_SIZE>>9)) mddev->queue->max_sectors = (PAGE_SIZE>>9); disk->head_position = 0; if (!rdev->faulty && rdev->in_sync) conf->working_disks++; } conf->raid_disks = mddev->raid_disks; conf->mddev = mddev; conf->device_lock = SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&conf->retry_list); conf->resync_lock = SPIN_LOCK_UNLOCKED; init_waitqueue_head(&conf->wait_idle); init_waitqueue_head(&conf->wait_resume); if (!conf->working_disks) { printk(KERN_ERR "raid10: no operational mirrors for %s\n", mdname(mddev)); goto out_free_conf; } mddev->degraded = 0; for (i = 0; i < conf->raid_disks; i++) { disk = conf->mirrors + i; if (!disk->rdev) { disk->head_position = 0; mddev->degraded++; } } mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10"); if (!mddev->thread) { printk(KERN_ERR "raid10: couldn't allocate thread for %s\n", mdname(mddev)); goto out_free_conf; } printk(KERN_INFO "raid10: raid set %s active with %d out of %d devices\n", mdname(mddev), mddev->raid_disks - mddev->degraded, mddev->raid_disks); /* * Ok, everything is just fine now */ size = conf->stride * conf->raid_disks; sector_div(size, conf->near_copies); mddev->array_size = size/2; mddev->resync_max_sectors = size; /* Calculate max read-ahead size. * We need to readahead at least twice a whole stripe.... * maybe... */ { int stripe = conf->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE; stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } if (conf->near_copies < mddev->raid_disks) blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); return 0;out_free_conf: if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); if (conf->mirrors) kfree(conf->mirrors); kfree(conf); mddev->private = NULL;out: return -EIO;}static int stop(mddev_t *mddev){ conf_t *conf = mddev_to_conf(mddev); md_unregister_thread(mddev->thread); mddev->thread = NULL; if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); if (conf->mirrors) kfree(conf->mirrors); kfree(conf); mddev->private = NULL; return 0;}static mdk_personality_t raid10_personality ={ .name = "raid10", .owner = THIS_MODULE, .make_request = make_request, .run = run, .stop = stop, .status = status, .error_handler = error, .hot_add_disk = raid10_add_disk, .hot_remove_disk= raid10_remove_disk, .spare_active = raid10_spare_active, .sync_request = sync_request,};static int __init raid_init(void){ return register_md_personality(RAID10, &raid10_personality);}static void raid_exit(void){ unregister_md_personality(RAID10);}module_init(raid_init);module_exit(raid_exit);MODULE_LICENSE("GPL");MODULE_ALIAS("md-personality-9"); /* RAID10 */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?