raid1.c
来自「linux 内核源代码」· C语言 代码 · 共 2,200 行 · 第 1/4 页
C
2,200 行
*/ if (mddev->curr_resync < max_sector) /* aborted */ bitmap_end_sync(mddev->bitmap, mddev->curr_resync, &sync_blocks, 1); else /* completed sync */ conf->fullsync = 0; bitmap_close_sync(mddev->bitmap); close_sync(conf); return 0; } if (mddev->bitmap == NULL && mddev->recovery_cp == MaxSector && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && conf->fullsync == 0) { *skipped = 1; return max_sector - sector_nr; } /* before building a request, check if we can skip these blocks.. * This call the bitmap_start_sync doesn't actually record anything */ if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block, and probably several more */ *skipped = 1; return sync_blocks; } /* * If there is non-resync activity waiting for a turn, * and resync is going fast enough, * then let it though before starting on this new sync request. */ if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); raise_barrier(conf); conf->next_resync = sector_nr; r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); rcu_read_lock(); /* * If we get a correctably read error during resync or recovery, * we might want to read from a different device. So we * flag all drives that could conceivably be read from for READ, * and any others (which will be non-In_sync devices) for WRITE. * If a read fails, we try reading from something else for which READ * is OK. */ r1_bio->mddev = mddev; r1_bio->sector = sector_nr; r1_bio->state = 0; set_bit(R1BIO_IsSync, &r1_bio->state); for (i=0; i < conf->raid_disks; i++) { mdk_rdev_t *rdev; bio = r1_bio->bios[i]; /* take from bio_init */ bio->bi_next = NULL; bio->bi_flags |= 1 << BIO_UPTODATE; bio->bi_rw = READ; bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; bio->bi_hw_segments = 0; bio->bi_size = 0; bio->bi_end_io = NULL; bio->bi_private = NULL; rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { still_degraded = 1; continue; } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else { /* may need to read from here */ bio->bi_rw = READ; bio->bi_end_io = end_sync_read; if (test_bit(WriteMostly, &rdev->flags)) { if (wonly < 0) wonly = i; } else { if (disk < 0) disk = i; } read_targets++; } atomic_inc(&rdev->nr_pending); bio->bi_sector = sector_nr + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_private = r1_bio; } rcu_read_unlock(); if (disk < 0) disk = wonly; r1_bio->read_disk = disk; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) /* extra read targets are also write targets */ write_targets += read_targets-1; if (write_targets == 0 || read_targets == 0) { /* There is nowhere to write, so all non-sync * drives must be failed - so we are finished */ sector_t rv = max_sector - sector_nr; *skipped = 1; put_buf(r1_bio); return rv; } nr_sectors = 0; sync_blocks = 0; do { struct page *page; int len = PAGE_SIZE; if (sector_nr + (len>>9) > max_sector) len = (max_sector - sector_nr) << 9; if (len == 0) break; if (sync_blocks == 0) { if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; BUG_ON(sync_blocks < (PAGE_SIZE>>9)); if (len > (sync_blocks<<9)) len = sync_blocks<<9; } for (i=0 ; i < conf->raid_disks; i++) { bio = r1_bio->bios[i]; if (bio->bi_end_io) { page = bio->bi_io_vec[bio->bi_vcnt].bv_page; if (bio_add_page(bio, page, len, 0) == 0) { /* stop here */ bio->bi_io_vec[bio->bi_vcnt].bv_page = page; while (i > 0) { i--; bio = r1_bio->bios[i]; if (bio->bi_end_io==NULL) continue; /* remove last page from this bio */ bio->bi_vcnt--; bio->bi_size -= len; bio->bi_flags &= ~(1<< BIO_SEG_VALID); } goto bio_full; } } } nr_sectors += len>>9; sector_nr += len>>9; sync_blocks -= (len>>9); } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); bio_full: r1_bio->sectors = nr_sectors; /* For a user-requested sync, we read all readable devices and do a * compare */ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { atomic_set(&r1_bio->remaining, read_targets); for (i=0; i<conf->raid_disks; i++) { bio = r1_bio->bios[i]; if (bio->bi_end_io == end_sync_read) { md_sync_acct(bio->bi_bdev, nr_sectors); generic_make_request(bio); } } } else { atomic_set(&r1_bio->remaining, 1); bio = r1_bio->bios[r1_bio->read_disk]; md_sync_acct(bio->bi_bdev, nr_sectors); generic_make_request(bio); } return nr_sectors;}static int run(mddev_t *mddev){ conf_t *conf; int i, j, disk_idx; mirror_info_t *disk; mdk_rdev_t *rdev; struct list_head *tmp; if (mddev->level != 1) { printk("raid1: %s: raid level not set to mirroring (%d)\n", mdname(mddev), mddev->level); goto out; } if (mddev->reshape_position != MaxSector) { printk("raid1: %s: reshape_position set but not supported\n", mdname(mddev)); goto out; } /* * copy the already verified devices into our private RAID1 * bookkeeping area. [whatever we allocate in run(), * should be freed in stop()] */ conf = kzalloc(sizeof(conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) goto out_no_mem; conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->mirrors) goto out_no_mem; conf->tmppage = alloc_page(GFP_KERNEL); if (!conf->tmppage) goto out_no_mem; conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); if (!conf->poolinfo) goto out_no_mem; conf->poolinfo->mddev = mddev; conf->poolinfo->raid_disks = mddev->raid_disks; conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, r1bio_pool_free, conf->poolinfo); if (!conf->r1bio_pool) goto out_no_mem; ITERATE_RDEV(mddev, rdev, tmp) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) continue; disk = conf->mirrors + disk_idx; disk->rdev = rdev; blk_queue_stack_limits(mddev->queue, rdev->bdev->bd_disk->queue); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && mddev->queue->max_sectors > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; } conf->raid_disks = mddev->raid_disks; conf->mddev = mddev; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->flushing_bio_list); mddev->degraded = 0; for (i = 0; i < conf->raid_disks; i++) { disk = conf->mirrors + i; if (!disk->rdev || !test_bit(In_sync, &disk->rdev->flags)) { disk->head_position = 0; mddev->degraded++; if (disk->rdev) conf->fullsync = 1; } } if (mddev->degraded == conf->raid_disks) { printk(KERN_ERR "raid1: no operational mirrors for %s\n", mdname(mddev)); goto out_free_conf; } if (conf->raid_disks - mddev->degraded == 1) mddev->recovery_cp = MaxSector; /* * find the first working one and use it as a starting point * to read balancing. */ for (j = 0; j < conf->raid_disks && (!conf->mirrors[j].rdev || !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) /* nothing */; conf->last_used = j; mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); if (!mddev->thread) { printk(KERN_ERR "raid1: couldn't allocate thread for %s\n", mdname(mddev)); goto out_free_conf; } printk(KERN_INFO "raid1: raid set %s active with %d out of %d mirrors\n", mdname(mddev), mddev->raid_disks - mddev->degraded, mddev->raid_disks); /* * Ok, everything is just fine now */ mddev->array_size = mddev->size; mddev->queue->unplug_fn = raid1_unplug; mddev->queue->backing_dev_info.congested_fn = raid1_congested; mddev->queue->backing_dev_info.congested_data = mddev; return 0;out_no_mem: printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", mdname(mddev));out_free_conf: if (conf) { if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); kfree(conf); mddev->private = NULL; }out: return -EIO;}static int stop(mddev_t *mddev){ conf_t *conf = mddev_to_conf(mddev); struct bitmap *bitmap = mddev->bitmap; int behind_wait = 0; /* wait for behind writes to complete */ while (bitmap && atomic_read(&bitmap->behind_writes) > 0) { behind_wait++; printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ); /* wait a second */ /* need to kick something here to make sure I/O goes? */ } md_unregister_thread(mddev->thread); mddev->thread = NULL; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); kfree(conf->poolinfo); kfree(conf); mddev->private = NULL; return 0;}static int raid1_resize(mddev_t *mddev, sector_t sectors){ /* no resync is happening, and there is enough space * on all devices, so we can resize. * We need to make sure resync covers any new space. * If the array is shrinking we should possibly wait until * any io in the removed space completes, but it hardly seems * worth it. */ mddev->array_size = sectors>>1; set_capacity(mddev->gendisk, mddev->array_size << 1); mddev->changed = 1; if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) { mddev->recovery_cp = mddev->size << 1; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = mddev->array_size; mddev->resync_max_sectors = sectors; return 0;}static int raid1_reshape(mddev_t *mddev){ /* We need to: * 1/ resize the r1bio_pool * 2/ resize conf->mirrors * * We allocate a new r1bio_pool if we can. * Then raise a device barrier and wait until all IO stops. * Then resize conf->mirrors and swap in the new r1bio pool. * * At the same time, we "pack" the devices so that all the missing * devices have the higher raid_disk numbers. */ mempool_t *newpool, *oldpool; struct pool_info *newpoolinfo; mirror_info_t *newmirrors; conf_t *conf = mddev_to_conf(mddev); int cnt, raid_disks; unsigned long flags; int d, d2; /* Cannot change chunk_size, layout, or level */ if (mddev->chunk_size != mddev->new_chunk || mddev->layout != mddev->new_layout || mddev->level != mddev->new_level) { mddev->new_chunk = mddev->chunk_size; mddev->new_layout = mddev->layout; mddev->new_level = mddev->level; return -EINVAL; } md_allow_write(mddev); raid_disks = mddev->raid_disks + mddev->delta_disks; if (raid_disks < conf->raid_disks) { cnt=0; for (d= 0; d < conf->raid_disks; d++) if (conf->mirrors[d].rdev) cnt++; if (cnt > raid_disks) return -EBUSY; } newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); if (!newpoolinfo) return -ENOMEM; newpoolinfo->mddev = mddev; newpoolinfo->raid_disks = raid_disks; newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, r1bio_pool_free, newpoolinfo); if (!newpool) { kfree(newpoolinfo); return -ENOMEM; } newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL); if (!newmirrors) { kfree(newpoolinfo); mempool_destroy(newpool); return -ENOMEM; } raise_barrier(conf); /* ok, everything is stopped */ oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; for (d = d2 = 0; d < conf->raid_disks; d++) { mdk_rdev_t *rdev = conf->mirrors[d].rdev; if (rdev && rdev->raid_disk != d2) { char nm[20]; sprintf(nm, "rd%d", rdev->raid_disk); sysfs_remove_link(&mddev->kobj, nm); rdev->raid_disk = d2; sprintf(nm, "rd%d", rdev->raid_disk); sysfs_remove_link(&mddev->kobj, nm); if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) printk(KERN_WARNING "md/raid1: cannot register " "%s for %s\n", nm, mdname(mddev)); } if (rdev) newmirrors[d2++].rdev = rdev; } kfree(conf->mirrors); conf->mirrors = newmirrors; kfree(conf->poolinfo); conf->poolinfo = newpoolinfo; spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded += (raid_disks - conf->raid_disks); spin_unlock_irqrestore(&conf->device_lock, flags); conf->raid_disks = mddev->raid_disks = raid_disks; mddev->delta_disks = 0; conf->last_used = 0; /* just make sure it is in-range */ lower_barrier(conf); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); mempool_destroy(oldpool); return 0;}static void raid1_quiesce(mddev_t *mddev, int state){ conf_t *conf = mddev_to_conf(mddev); switch(state) { case 1: raise_barrier(conf); break; case 0: lower_barrier(conf); break; }}static struct mdk_personality raid1_personality ={ .name = "raid1", .level = 1, .owner = THIS_MODULE, .make_request = make_request, .run = run, .stop = stop, .status = status, .error_handler = error, .hot_add_disk = raid1_add_disk, .hot_remove_disk= raid1_remove_disk, .spare_active = raid1_spare_active, .sync_request = sync_request, .resize = raid1_resize, .check_reshape = raid1_reshape, .quiesce = raid1_quiesce,};static int __init raid_init(void){ return register_md_personality(&raid1_personality);}static void raid_exit(void){ unregister_md_personality(&raid1_personality);}module_init(raid_init);module_exit(raid_exit);MODULE_LICENSE("GPL");MODULE_ALIAS("md-personality-3"); /* RAID1 */MODULE_ALIAS("md-raid1");MODULE_ALIAS("md-level-1");
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?