📄 multipath.c
字号:
/* * Activate (mark read-write) the (now sync) spare disk, * which means we switch it's 'raid position' (->raid_disk) * with the failed disk. (only the first 'conf->nr_disks' * slots are used for 'real' disks and we must preserve this * property) */ case DISKOP_SPARE_ACTIVE: sdisk = conf->multipaths + spare_disk; fdisk = conf->multipaths + failed_disk; spare_desc = &sb->disks[sdisk->number]; failed_desc = &sb->disks[fdisk->number]; if (spare_desc != *d) { MD_BUG(); err = 1; goto abort; } if (spare_desc->raid_disk != sdisk->raid_disk) { MD_BUG(); err = 1; goto abort; } if (sdisk->raid_disk != spare_disk) { MD_BUG(); err = 1; goto abort; } if (failed_desc->raid_disk != fdisk->raid_disk) { MD_BUG(); err = 1; goto abort; } if (fdisk->raid_disk != failed_disk) { MD_BUG(); err = 1; goto abort; } /* * do the switch finally */ spare_rdev = find_rdev_nr(mddev, spare_desc->number); failed_rdev = find_rdev_nr(mddev, failed_desc->number); xchg_values(spare_rdev->desc_nr, failed_rdev->desc_nr); spare_rdev->alias_device = 0; failed_rdev->alias_device = 1; xchg_values(*spare_desc, *failed_desc); xchg_values(*fdisk, *sdisk); /* * (careful, 'failed' and 'spare' are switched from now on) * * we want to preserve linear numbering and we want to * give the proper raid_disk number to the now activated * disk. (this means we switch back these values) */ xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); xchg_values(sdisk->raid_disk, fdisk->raid_disk); xchg_values(spare_desc->number, failed_desc->number); xchg_values(sdisk->number, fdisk->number); *d = failed_desc; if (sdisk->dev == MKDEV(0,0)) sdisk->used_slot = 0; /* * this really activates the spare. */ fdisk->spare = 0; /* * if we activate a spare, we definitely replace a * non-operational disk slot in the 'low' area of * the disk array. */ conf->working_disks++; break; case DISKOP_HOT_REMOVE_DISK: rdisk = conf->multipaths + removed_disk; if (rdisk->spare && (removed_disk < conf->raid_disks)) { MD_BUG(); err = 1; goto abort; } rdisk->dev = MKDEV(0,0); rdisk->used_slot = 0; conf->nr_disks--; break; case DISKOP_HOT_ADD_DISK: adisk = conf->multipaths + added_disk; added_desc = *d; if (added_disk != added_desc->number) { MD_BUG(); err = 1; goto abort; } adisk->number = added_desc->number; adisk->raid_disk = added_desc->raid_disk; adisk->dev = MKDEV(added_desc->major,added_desc->minor); adisk->operational = 0; adisk->spare = 1; adisk->used_slot = 1; conf->nr_disks++; break; default: MD_BUG(); err = 1; goto abort; }abort: md_spin_unlock_irq(&conf->device_lock); print_multipath_conf(conf); return err;}#define IO_ERROR KERN_ALERT \"multipath: %s: unrecoverable IO read error for block %lu\n"#define REDIRECT_SECTOR KERN_ERR \"multipath: %s: redirecting sector %lu to another IO path\n"/* * This is a kernel thread which: * * 1. Retries failed read operations on working multipaths. * 2. Updates the raid superblock when problems encounter. * 3. Performs writes following reads for array syncronising. */static void multipathd (void *data){ struct multipath_bh *mp_bh; struct buffer_head *bh; unsigned long flags; mddev_t *mddev; kdev_t dev; for (;;) { md_spin_lock_irqsave(&retry_list_lock, flags); mp_bh = multipath_retry_list; if (!mp_bh) break; multipath_retry_list = mp_bh->next_mp; md_spin_unlock_irqrestore(&retry_list_lock, flags); mddev = mp_bh->mddev; if (mddev->sb_dirty) md_update_sb(mddev); bh = &mp_bh->bh_req; dev = bh->b_dev; multipath_map (mddev, &bh->b_dev); if (bh->b_dev == dev) { printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr); multipath_end_bh_io(mp_bh, 0); } else { printk (REDIRECT_SECTOR, partition_name(bh->b_dev), bh->b_blocknr); bh->b_rdev = bh->b_dev; bh->b_rsector = bh->b_blocknr; generic_make_request (mp_bh->cmd, bh); } } md_spin_unlock_irqrestore(&retry_list_lock, flags);}#undef IO_ERROR#undef REDIRECT_SECTOR/* * This will catch the scenario in which one of the multipaths was * mounted as a normal device rather than as a part of a raid set. * * check_consistency is very personality-dependent, eg. RAID5 cannot * do this check, it uses another method. */static int __check_consistency (mddev_t *mddev, int row){ multipath_conf_t *conf = mddev_to_conf(mddev); int disks = MD_SB_DISKS; kdev_t dev; struct buffer_head *bh = NULL; int i, rc = 0; char *buffer = NULL; for (i = 0; i < disks; i++) { if (!conf->multipaths[i].operational) continue; printk("(checking disk %d)\n",i); dev = conf->multipaths[i].dev; set_blocksize(dev, 4096); if ((bh = bread(dev, row / 4, 4096)) == NULL) break; if (!buffer) { buffer = (char *) __get_free_page(GFP_KERNEL); if (!buffer) break; memcpy(buffer, bh->b_data, 4096); } else if (memcmp(buffer, bh->b_data, 4096)) { rc = 1; break; } bforget(bh); fsync_dev(dev); invalidate_buffers(dev); bh = NULL; } if (buffer) free_page((unsigned long) buffer); if (bh) { dev = bh->b_dev; bforget(bh); fsync_dev(dev); invalidate_buffers(dev); } return rc;}static int check_consistency (mddev_t *mddev){ if (__check_consistency(mddev, 0))/* * we do not do this currently, as it's perfectly possible to * have an inconsistent array when it's freshly created. Only * newly written data has to be consistent. */ return 0; return 0;}#define INVALID_LEVEL KERN_WARNING \"multipath: md%d: raid level not set to multipath IO (%d)\n"#define NO_SB KERN_ERR \"multipath: disabled IO path %s (couldn't access raid superblock)\n"#define ERRORS KERN_ERR \"multipath: disabled IO path %s (errors detected)\n"#define NOT_IN_SYNC KERN_ERR \"multipath: making IO path %s a spare path (not in sync)\n"#define INCONSISTENT KERN_ERR \"multipath: disabled IO path %s (inconsistent descriptor)\n"#define ALREADY_RUNNING KERN_ERR \"multipath: disabled IO path %s (multipath %d already operational)\n"#define OPERATIONAL KERN_INFO \"multipath: device %s operational as IO path %d\n"#define MEM_ERROR KERN_ERR \"multipath: couldn't allocate memory for md%d\n"#define SPARE KERN_INFO \"multipath: spare IO path %s\n"#define NONE_OPERATIONAL KERN_ERR \"multipath: no operational IO paths for md%d\n"#define SB_DIFFERENCES KERN_ERR \"multipath: detected IO path differences!\n"#define ARRAY_IS_ACTIVE KERN_INFO \"multipath: array md%d active with %d out of %d IO paths (%d spare IO paths)\n"#define THREAD_ERROR KERN_ERR \"multipath: couldn't allocate thread for md%d\n"static int multipath_run (mddev_t *mddev){ multipath_conf_t *conf; int i, j, disk_idx; struct multipath_info *disk, *disk2; mdp_super_t *sb = mddev->sb; mdp_disk_t *desc, *desc2; mdk_rdev_t *rdev, *def_rdev = NULL; struct md_list_head *tmp; int num_rdevs = 0; MOD_INC_USE_COUNT; if (sb->level != -4) { printk(INVALID_LEVEL, mdidx(mddev), sb->level); goto out; } /* * copy the already verified devices into our private MULTIPATH * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(MEM_ERROR, mdidx(mddev)); goto out; } memset(conf, 0, sizeof(*conf)); ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) { /* this is a "should never happen" case and if it */ /* ever does happen, a continue; won't help */ printk(ERRORS, partition_name(rdev->dev)); continue; } else { /* this is a "should never happen" case and if it */ /* ever does happen, a continue; won't help */ if (!rdev->sb) { MD_BUG(); continue; } } if (rdev->desc_nr == -1) { MD_BUG(); continue; } desc = &sb->disks[rdev->desc_nr]; disk_idx = desc->raid_disk; disk = conf->multipaths + disk_idx; if (!disk_sync(desc)) printk(NOT_IN_SYNC, partition_name(rdev->dev)); /* * Mark all disks as spare to start with, then pick our * active disk. If we have a disk that is marked active * in the sb, then use it, else use the first rdev. */ disk->number = desc->number; disk->raid_disk = desc->raid_disk; disk->dev = rdev->dev; disk->operational = 0; disk->spare = 1; disk->used_slot = 1; mark_disk_sync(desc); if (disk_active(desc)) { if(!conf->working_disks) { printk(OPERATIONAL, partition_name(rdev->dev), desc->raid_disk); disk->operational = 1; disk->spare = 0; conf->working_disks++; def_rdev = rdev; } else { mark_disk_spare(desc); } } else mark_disk_spare(desc); if(!num_rdevs++) def_rdev = rdev; } if(!conf->working_disks && num_rdevs) { desc = &sb->disks[def_rdev->desc_nr]; disk = conf->multipaths + desc->raid_disk; printk(OPERATIONAL, partition_name(def_rdev->dev), disk->raid_disk); disk->operational = 1; disk->spare = 0; conf->working_disks++; mark_disk_active(desc); } /* * Make sure our active path is in desc spot 0 */ if(def_rdev->desc_nr != 0) { rdev = find_rdev_nr(mddev, 0); desc = &sb->disks[def_rdev->desc_nr]; desc2 = sb->disks; disk = conf->multipaths + desc->raid_disk; disk2 = conf->multipaths + desc2->raid_disk; xchg_values(*desc2,*desc); xchg_values(*disk2,*disk); xchg_values(desc2->number, desc->number); xchg_values(disk2->number, disk->number); xchg_values(desc2->raid_disk, desc->raid_disk); xchg_values(disk2->raid_disk, disk->raid_disk); if(rdev) { xchg_values(def_rdev->desc_nr,rdev->desc_nr); } else { def_rdev->desc_nr = 0; } } conf->raid_disks = sb->raid_disks = sb->active_disks = 1; conf->nr_disks = sb->nr_disks = sb->working_disks = num_rdevs; sb->failed_disks = 0; sb->spare_disks = num_rdevs - 1; mddev->sb_dirty = 1; conf->mddev = mddev; conf->device_lock = MD_SPIN_LOCK_UNLOCKED; init_waitqueue_head(&conf->wait_buffer); if (!conf->working_disks) { printk(NONE_OPERATIONAL, mdidx(mddev)); goto out_free_conf; } /* pre-allocate some buffer_head structures. * As a minimum, 1 mpbh and raid_disks buffer_heads * would probably get us by in tight memory situations, * but a few more is probably a good idea. * For now, try NR_RESERVED_BUFS mpbh and * NR_RESERVED_BUFS*raid_disks bufferheads * This will allow at least NR_RESERVED_BUFS concurrent * reads or writes even if kmalloc starts failing */ if (multipath_grow_mpbh(conf, NR_RESERVED_BUFS) < NR_RESERVED_BUFS) { printk(MEM_ERROR, mdidx(mddev)); goto out_free_conf; } if ((sb->state & (1 << MD_SB_CLEAN))) { /* * we do sanity checks even if the device says * it's clean ... */ if (check_consistency(mddev)) { printk(SB_DIFFERENCES); sb->state &= ~(1 << MD_SB_CLEAN); } } { const char * name = "multipathd"; conf->thread = md_register_thread(multipathd, conf, name); if (!conf->thread) { printk(THREAD_ERROR, mdidx(mddev)); goto out_free_conf; } } /* * Regenerate the "device is in sync with the raid set" bit for * each device. */ for (i = 0; i < MD_SB_DISKS; i++) { mark_disk_nonsync(sb->disks+i); for (j = 0; j < sb->raid_disks; j++) { if (sb->disks[i].number == conf->multipaths[j].number) mark_disk_sync(sb->disks+i); } } printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks, sb->spare_disks); /* * Ok, everything is just fine now */ return 0;out_free_conf: multipath_shrink_mpbh(conf); kfree(conf); mddev->private = NULL;out: MOD_DEC_USE_COUNT; return -EIO;}#undef INVALID_LEVEL#undef NO_SB#undef ERRORS#undef NOT_IN_SYNC#undef INCONSISTENT#undef ALREADY_RUNNING#undef OPERATIONAL#undef SPARE#undef NONE_OPERATIONAL#undef SB_DIFFERENCES#undef ARRAY_IS_ACTIVEstatic int multipath_stop (mddev_t *mddev){ multipath_conf_t *conf = mddev_to_conf(mddev); md_unregister_thread(conf->thread); multipath_shrink_mpbh(conf); kfree(conf); mddev->private = NULL; MOD_DEC_USE_COUNT; return 0;}static mdk_personality_t multipath_personality={ name: "multipath", make_request: multipath_make_request, run: multipath_run, stop: multipath_stop, status: multipath_status, error_handler: multipath_error, diskop: multipath_diskop,};static int md__init multipath_init (void){ return register_md_personality (MULTIPATH, &multipath_personality);}static void multipath_exit (void){ unregister_md_personality (MULTIPATH);}module_init(multipath_init);module_exit(multipath_exit);MODULE_LICENSE("GPL");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -