📄 raid1.c
字号:
md_done_sync(mddev, bh->b_size>>9, 0); } break; case READ: case READA: dev = bh->b_dev; raid1_map (mddev, &bh->b_dev); if (bh->b_dev == dev) { printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr); raid1_end_bh_io(r1_bh, 0); } else { printk (REDIRECT_SECTOR, partition_name(bh->b_dev), bh->b_blocknr); bh->b_rdev = bh->b_dev; bh->b_rsector = bh->b_blocknr; generic_make_request (r1_bh->cmd, bh); } break; } } md_spin_unlock_irqrestore(&retry_list_lock, flags);}#undef IO_ERROR#undef REDIRECT_SECTOR/* * Private kernel thread to reconstruct mirrors after an unclean * shutdown. */static void raid1syncd (void *data){ raid1_conf_t *conf = data; mddev_t *mddev = conf->mddev; if (!conf->resync_mirrors) return; if (conf->resync_mirrors == 2) return; down(&mddev->recovery_sem); if (!md_do_sync(mddev, NULL)) { /* * Only if everything went Ok. */ conf->resync_mirrors = 0; } close_sync(conf); up(&mddev->recovery_sem); raid1_shrink_buffers(conf);}/* * perform a "sync" on one "block" * * We need to make sure that no normal I/O request - particularly write * requests - conflict with active sync requests. * This is achieved by conceptually dividing the device space into a * number of sections: * DONE: 0 .. a-1 These blocks are in-sync * ACTIVE: a.. b-1 These blocks may have active sync requests, but * no normal IO requests * READY: b .. c-1 These blocks have no normal IO requests - sync * request may be happening * PENDING: c .. d-1 These blocks may have IO requests, but no new * ones will be added * FUTURE: d .. end These blocks are not to be considered yet. IO may * be happening, but not sync * * We keep a * phase which flips (0 or 1) each time d moves and * a count of: * z = active io requests in FUTURE since d moved - marked with * current phase * y = active io requests in FUTURE before d moved, or PENDING - * marked with previous phase * x = active sync requests in READY * w = active sync requests in ACTIVE * v = active io requests in DONE * * Normally, a=b=c=d=0 and z= active io requests * or a=b=c=d=END and v= active io requests * Allowed changes to a,b,c,d: * A: c==d && y==0 -> d+=window, y=z, z=0, phase=!phase * B: y==0 -> c=d * C: b=c, w+=x, x=0 * D: w==0 -> a=b * E: a==b==c==d==end -> a=b=c=d=0, z=v, v=0 * * At start of sync we apply A. * When y reaches 0, we apply B then A then being sync requests * When sync point reaches c-1, we wait for y==0, and W==0, and * then apply apply B then A then D then C. * Finally, we apply E * * The sync request simply issues a "read" against a working drive * This is marked so that on completion the raid1d thread is woken to * issue suitable write requests */static int raid1_sync_request (mddev_t *mddev, unsigned long sector_nr){ raid1_conf_t *conf = mddev_to_conf(mddev); struct mirror_info *mirror; struct raid1_bh *r1_bh; struct buffer_head *bh; int bsize; int disk; int block_nr; spin_lock_irq(&conf->segment_lock); if (!sector_nr) { /* initialize ...*/ int buffs; conf->start_active = 0; conf->start_ready = 0; conf->start_pending = 0; conf->start_future = 0; conf->phase = 0; /* we want enough buffers to hold twice the window of 128*/ buffs = 128 *2 / (PAGE_SIZE>>9); buffs = raid1_grow_buffers(conf, buffs); if (buffs < 2) goto nomem; conf->window = buffs*(PAGE_SIZE>>9)/2; conf->cnt_future += conf->cnt_done+conf->cnt_pending; conf->cnt_done = conf->cnt_pending = 0; if (conf->cnt_ready || conf->cnt_active) MD_BUG(); } while (sector_nr >= conf->start_pending) { PRINTK("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n", sector_nr, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future, conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future); wait_event_lock_irq(conf->wait_done, !conf->cnt_active, conf->segment_lock); wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock); conf->start_active = conf->start_ready; conf->start_ready = conf->start_pending; conf->start_pending = conf->start_future; conf->start_future = conf->start_future+conf->window; // Note: falling off the end is not a problem conf->phase = conf->phase ^1; conf->cnt_active = conf->cnt_ready; conf->cnt_ready = 0; conf->cnt_pending = conf->cnt_future; conf->cnt_future = 0; wake_up(&conf->wait_done); } conf->cnt_ready++; spin_unlock_irq(&conf->segment_lock); /* If reconstructing, and >1 working disc, * could dedicate one to rebuild and others to * service read requests .. */ disk = conf->last_used; /* make sure disk is operational */ while (!conf->mirrors[disk].operational) { if (disk <= 0) disk = conf->raid_disks; disk--; if (disk == conf->last_used) break; } conf->last_used = disk; mirror = conf->mirrors+conf->last_used; r1_bh = raid1_alloc_buf (conf); r1_bh->master_bh = NULL; r1_bh->mddev = mddev; r1_bh->cmd = SPECIAL; bh = &r1_bh->bh_req; block_nr = sector_nr; bsize = 512; while (!(block_nr & 1) && bsize < PAGE_SIZE && (block_nr+2)*(bsize>>9) < (mddev->sb->size *2)) { block_nr >>= 1; bsize <<= 1; } bh->b_size = bsize; bh->b_list = BUF_LOCKED; bh->b_dev = mirror->dev; bh->b_rdev = mirror->dev; bh->b_state = (1<<BH_Req) | (1<<BH_Mapped) | (1<<BH_Lock); if (!bh->b_page) BUG(); if (!bh->b_data) BUG(); if (bh->b_data != page_address(bh->b_page)) BUG(); bh->b_end_io = end_sync_read; bh->b_private = r1_bh; bh->b_blocknr = sector_nr; bh->b_rsector = sector_nr; init_waitqueue_head(&bh->b_wait); generic_make_request(READ, bh); md_sync_acct(bh->b_dev, bh->b_size/512); return (bsize >> 9);nomem: raid1_shrink_buffers(conf); spin_unlock_irq(&conf->segment_lock); return -ENOMEM;}static void end_sync_read(struct buffer_head *bh, int uptodate){ struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private); /* we have read a block, now it needs to be re-written, * or re-read if the read failed. * We don't do much here, just schedule handling by raid1d */ if (!uptodate) md_error (r1_bh->mddev, bh->b_dev); else set_bit(R1BH_Uptodate, &r1_bh->state); raid1_reschedule_retry(r1_bh);}static void end_sync_write(struct buffer_head *bh, int uptodate){ struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private); if (!uptodate) md_error (r1_bh->mddev, bh->b_dev); if (atomic_dec_and_test(&r1_bh->remaining)) { mddev_t *mddev = r1_bh->mddev; unsigned long sect = bh->b_blocknr; int size = bh->b_size; raid1_free_buf(r1_bh); sync_request_done(sect, mddev_to_conf(mddev)); md_done_sync(mddev,size>>9, uptodate); }}#define INVALID_LEVEL KERN_WARNING \"raid1: md%d: raid level not set to mirroring (%d)\n"#define NO_SB KERN_ERR \"raid1: disabled mirror %s (couldn't access raid superblock)\n"#define ERRORS KERN_ERR \"raid1: disabled mirror %s (errors detected)\n"#define NOT_IN_SYNC KERN_ERR \"raid1: disabled mirror %s (not in sync)\n"#define INCONSISTENT KERN_ERR \"raid1: disabled mirror %s (inconsistent descriptor)\n"#define ALREADY_RUNNING KERN_ERR \"raid1: disabled mirror %s (mirror %d already operational)\n"#define OPERATIONAL KERN_INFO \"raid1: device %s operational as mirror %d\n"#define MEM_ERROR KERN_ERR \"raid1: couldn't allocate memory for md%d\n"#define SPARE KERN_INFO \"raid1: spare disk %s\n"#define NONE_OPERATIONAL KERN_ERR \"raid1: no operational mirrors for md%d\n"#define ARRAY_IS_ACTIVE KERN_INFO \"raid1: raid set md%d active with %d out of %d mirrors\n"#define THREAD_ERROR KERN_ERR \"raid1: couldn't allocate thread for md%d\n"#define START_RESYNC KERN_WARNING \"raid1: raid set md%d not clean; reconstructing mirrors\n"static int raid1_run (mddev_t *mddev){ raid1_conf_t *conf; int i, j, disk_idx; struct mirror_info *disk; mdp_super_t *sb = mddev->sb; mdp_disk_t *descriptor; mdk_rdev_t *rdev; struct md_list_head *tmp; int start_recovery = 0; MOD_INC_USE_COUNT; if (sb->level != 1) { printk(INVALID_LEVEL, mdidx(mddev), sb->level); goto out; } /* * copy the already verified devices into our private RAID1 * bookkeeping area. [whatever we allocate in raid1_run(), * should be freed in raid1_stop()] */ conf = kmalloc(sizeof(raid1_conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(MEM_ERROR, mdidx(mddev)); goto out; } memset(conf, 0, sizeof(*conf)); ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) { printk(ERRORS, partition_name(rdev->dev)); } else { if (!rdev->sb) { MD_BUG(); continue; } } if (rdev->desc_nr == -1) { MD_BUG(); continue; } descriptor = &sb->disks[rdev->desc_nr]; disk_idx = descriptor->raid_disk; disk = conf->mirrors + disk_idx; if (disk_faulty(descriptor)) { disk->number = descriptor->number; disk->raid_disk = disk_idx; disk->dev = rdev->dev; disk->sect_limit = MAX_WORK_PER_DISK; disk->operational = 0; disk->write_only = 0; disk->spare = 0; disk->used_slot = 1; disk->head_position = 0; continue; } if (disk_active(descriptor)) { if (!disk_sync(descriptor)) { printk(NOT_IN_SYNC, partition_name(rdev->dev)); continue; } if ((descriptor->number > MD_SB_DISKS) || (disk_idx > sb->raid_disks)) { printk(INCONSISTENT, partition_name(rdev->dev)); continue; } if (disk->operational) { printk(ALREADY_RUNNING, partition_name(rdev->dev), disk_idx); continue; } printk(OPERATIONAL, partition_name(rdev->dev), disk_idx); disk->number = descriptor->number; disk->raid_disk = disk_idx; disk->dev = rdev->dev; disk->sect_limit = MAX_WORK_PER_DISK; disk->operational = 1; disk->write_only = 0; disk->spare = 0; disk->used_slot = 1; disk->head_position = 0; conf->working_disks++; } else { /* * Must be a spare disk .. */ printk(SPARE, partition_name(rdev->dev)); disk->number = descriptor->number; disk->raid_disk = disk_idx; disk->dev = rdev->dev; disk->sect_limit = MAX_WORK_PER_DISK; disk->operational = 0; disk->write_only = 0; disk->spare = 1; disk->used_slot = 1; disk->head_position = 0; } } conf->raid_disks = sb->raid_disks; conf->nr_disks = sb->nr_disks; conf->mddev = mddev; conf->device_lock = MD_SPIN_LOCK_UNLOCKED; conf->segment_lock = MD_SPIN_LOCK_UNLOCKED; init_waitqueue_head(&conf->wait_buffer); init_waitqueue_head(&conf->wait_done); init_waitqueue_head(&conf->wait_ready); if (!conf->working_disks) { printk(NONE_OPERATIONAL, mdidx(mddev)); goto out_free_conf; } /* pre-allocate some buffer_head structures. * As a minimum, 1 r1bh and raid_disks buffer_heads * would probably get us by in tight memory situations, * but a few more is probably a good idea. * For now, try NR_RESERVED_BUFS r1bh and * NR_RESERVED_BUFS*raid_disks bufferheads * This will allow at least NR_RESERVED_BUFS concurrent * reads or writes even if kmalloc starts failing */ if (raid1_grow_r1bh(conf, NR_RESERVED_BUFS) < NR_RESERVED_BUFS || raid1_grow_bh(conf, NR_RESERVED_BUFS*conf->raid_disks) < NR_RESERVED_BUFS*conf->raid_disks) { printk(MEM_ERROR, mdidx(mddev)); goto out_free_conf; } for (i = 0; i < MD_SB_DISKS; i++) { descriptor = sb->disks+i; disk_idx = descriptor->raid_disk; disk = conf->mirrors + disk_idx; if (disk_faulty(descriptor) && (disk_idx < conf->raid_disks) && !disk->used_slot) { disk->number = descriptor->number; disk->raid_disk = disk_idx; disk->dev = MKDEV(0,0); disk->operational = 0; disk->write_only = 0; disk->spare = 0; disk->used_slot = 1; disk->head_position = 0; } } /* * find the first working one and use it as a starting point * to read balancing. */ for (j = 0; !conf->mirrors[j].operational && j < MD_SB_DISKS; j++) /* nothing */; conf->last_used = j; if (conf->working_disks != sb->raid_disks) { printk(KERN_ALERT "raid1: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); start_recovery = 1; } { const char * name = "raid1d"; conf->thread = md_register_thread(raid1d, conf, name); if (!conf->thread) { printk(THREAD_ERROR, mdidx(mddev)); goto out_free_conf; } } if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN)) && (conf->working_disks > 1)) { const char * name = "raid1syncd"; conf->resync_thread = md_register_thread(raid1syncd, conf,name); if (!conf->resync_thread) { printk(THREAD_ERROR, mdidx(mddev)); goto out_free_conf; } printk(START_RESYNC, mdidx(mddev)); conf->resync_mirrors = 1; md_wakeup_thread(conf->resync_thread); } /* * Regenerate the "device is in sync with the raid set" bit for * each device. */ for (i = 0; i < MD_SB_DISKS; i++) { mark_disk_nonsync(sb->disks+i); for (j = 0; j < sb->raid_disks; j++) { if (!conf->mirrors[j].operational) continue; if (sb->disks[i].number == conf->mirrors[j].number) mark_disk_sync(sb->disks+i); } } sb->active_disks = conf->working_disks; if (start_recovery) md_recover_arrays(); printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks); /* * Ok, everything is just fine now */ return 0;out_free_conf: raid1_shrink_r1bh(conf); raid1_shrink_bh(conf); raid1_shrink_buffers(conf); kfree(conf); mddev->private = NULL;out: MOD_DEC_USE_COUNT; return -EIO;}#undef INVALID_LEVEL#undef NO_SB#undef ERRORS#undef NOT_IN_SYNC#undef INCONSISTENT#undef ALREADY_RUNNING#undef OPERATIONAL#undef SPARE#undef NONE_OPERATIONAL#undef ARRAY_IS_ACTIVEstatic int raid1_stop_resync (mddev_t *mddev){ raid1_conf_t *conf = mddev_to_conf(mddev); if (conf->resync_thread) { if (conf->resync_mirrors) { conf->resync_mirrors = 2; md_interrupt_thread(conf->resync_thread); printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n"); return 1; } return 0; } return 0;}static int raid1_restart_resync (mddev_t *mddev){ raid1_conf_t *conf = mddev_to_conf(mddev); if (conf->resync_mirrors) { if (!conf->resync_thread) { MD_BUG(); return 0; } conf->resync_mirrors = 1; md_wakeup_thread(conf->resync_thread); return 1; } return 0;}static int raid1_stop (mddev_t *mddev){ raid1_conf_t *conf = mddev_to_conf(mddev); md_unregister_thread(conf->thread); if (conf->resync_thread) md_unregister_thread(conf->resync_thread); raid1_shrink_r1bh(conf); raid1_shrink_bh(conf); raid1_shrink_buffers(conf); kfree(conf); mddev->private = NULL; MOD_DEC_USE_COUNT; return 0;}static mdk_personality_t raid1_personality={ name: "raid1", make_request: raid1_make_request, run: raid1_run, stop: raid1_stop, status: raid1_status, error_handler: raid1_error, diskop: raid1_diskop, stop_resync: raid1_stop_resync, restart_resync: raid1_restart_resync, sync_request: raid1_sync_request};static int md__init raid1_init (void){ return register_md_personality (RAID1, &raid1_personality);}static void raid1_exit (void){ unregister_md_personality (RAID1);}module_init(raid1_init);module_exit(raid1_exit);MODULE_LICENSE("GPL");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -