📄 md.c
字号:
*/ if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) { if (rdev->sb->events_lo || rdev->sb->events_hi) if ((rdev->sb->events_lo--)==0) rdev->sb->events_hi--; } printk("%s's event counter: %08lx\n", partition_name(rdev->dev), (unsigned long)rdev->sb->events_lo); if (!freshest) { freshest = rdev; continue; } /* * Find the newest superblock version */ ev1 = md_event(rdev->sb); ev2 = md_event(freshest->sb); if (ev1 != ev2) { out_of_date = 1; if (ev1 > ev2) freshest = rdev; } } if (out_of_date) { printk(OUT_OF_DATE); printk("freshest: %s\n", partition_name(freshest->dev)); } memcpy (sb, freshest->sb, sizeof(*sb)); /* * at this point we have picked the 'best' superblock * from all available superblocks. * now we validate this superblock and kick out possibly * failed disks. */ ITERATE_RDEV(mddev,rdev,tmp) { /* * Kick all non-fresh devices faulty */ __u64 ev1, ev2; ev1 = md_event(rdev->sb); ev2 = md_event(sb); ++ev1; if (ev1 < ev2) { printk("md: kicking non-fresh %s from array!\n", partition_name(rdev->dev)); kick_rdev_from_array(rdev); continue; } } /* * Fix up changed device names ... but only if this disk has a * recent update time. Use faulty checksum ones too. */ ITERATE_RDEV(mddev,rdev,tmp) { __u64 ev1, ev2, ev3; if (rdev->faulty) { /* REMOVEME */ MD_BUG(); goto abort; } ev1 = md_event(rdev->sb); ev2 = md_event(sb); ev3 = ev2; --ev3; if ((rdev->dev != rdev->old_dev) && ((ev1 == ev2) || (ev1 == ev3))) { mdp_disk_t *desc; printk("md: device name has changed from %s to %s since last import!\n", partition_name(rdev->old_dev), partition_name(rdev->dev)); if (rdev->desc_nr == -1) { MD_BUG(); goto abort; } desc = &sb->disks[rdev->desc_nr]; if (rdev->old_dev != MKDEV(desc->major, desc->minor)) { MD_BUG(); goto abort; } desc->major = MAJOR(rdev->dev); desc->minor = MINOR(rdev->dev); desc = &rdev->sb->this_disk; desc->major = MAJOR(rdev->dev); desc->minor = MINOR(rdev->dev); } } /* * Remove unavailable and faulty devices ... * * note that if an array becomes completely unrunnable due to * missing devices, we do not write the superblock back, so the * administrator has a chance to fix things up. The removal thus * only happens if it's nonfatal to the contents of the array. */ for (i = 0; i < MD_SB_DISKS; i++) { int found; mdp_disk_t *desc; kdev_t dev; desc = sb->disks + i; dev = MKDEV(desc->major, desc->minor); /* * We kick faulty devices/descriptors immediately. */ if (disk_faulty(desc)) { found = 0; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr != desc->number) continue; printk("md%d: kicking faulty %s!\n", mdidx(mddev),partition_name(rdev->dev)); kick_rdev_from_array(rdev); found = 1; break; } if (!found) { if (dev == MKDEV(0,0)) continue; printk("md%d: removing former faulty %s!\n", mdidx(mddev), partition_name(dev)); } remove_descriptor(desc, sb); continue; } if (dev == MKDEV(0,0)) continue; /* * Is this device present in the rdev ring? */ found = 0; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == desc->number) { found = 1; break; } } if (found) continue; printk("md%d: former device %s is unavailable, removing from array!\n", mdidx(mddev), partition_name(dev)); remove_descriptor(desc, sb); } /* * Double check wether all devices mentioned in the * superblock are in the rdev ring. */ for (i = 0; i < MD_SB_DISKS; i++) { mdp_disk_t *desc; kdev_t dev; desc = sb->disks + i; dev = MKDEV(desc->major, desc->minor); if (dev == MKDEV(0,0)) continue; if (disk_faulty(desc)) { MD_BUG(); goto abort; } rdev = find_rdev(mddev, dev); if (!rdev) { MD_BUG(); goto abort; } } /* * Do a final reality check. */ ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == -1) { MD_BUG(); goto abort; } /* * is the desc_nr unique? */ ITERATE_RDEV(mddev,rdev2,tmp2) { if ((rdev2 != rdev) && (rdev2->desc_nr == rdev->desc_nr)) { MD_BUG(); goto abort; } } /* * is the device unique? */ ITERATE_RDEV(mddev,rdev2,tmp2) { if ((rdev2 != rdev) && (rdev2->dev == rdev->dev)) { MD_BUG(); goto abort; } } } /* * Check if we can support this RAID array */ if (sb->major_version != MD_MAJOR_VERSION || sb->minor_version > MD_MINOR_VERSION) { printk (OLD_VERSION, mdidx(mddev), sb->major_version, sb->minor_version, sb->patch_version); goto abort; } if ((sb->state != (1 << MD_SB_CLEAN)) && ((sb->level == 1) || (sb->level == 4) || (sb->level == 5))) printk (NOT_CLEAN_IGNORE, mdidx(mddev)); return 0;abort: return 1;}#undef INCONSISTENT#undef OUT_OF_DATE#undef OLD_VERSION#undef OLD_LEVELstatic int device_size_calculation (mddev_t * mddev){ int data_disks = 0, persistent; unsigned int readahead; mdp_super_t *sb = mddev->sb; struct md_list_head *tmp; mdk_rdev_t *rdev; /* * Do device size calculation. Bail out if too small. * (we have to do this after having validated chunk_size, * because device size has to be modulo chunk_size) */ persistent = !mddev->sb->not_persistent; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; if (rdev->size) { MD_BUG(); continue; } rdev->size = calc_dev_size(rdev->dev, mddev, persistent); if (rdev->size < sb->chunk_size / 1024) { printk (KERN_WARNING "Dev %s smaller than chunk_size: %ldk < %dk\n", partition_name(rdev->dev), rdev->size, sb->chunk_size / 1024); return -EINVAL; } } switch (sb->level) { case -3: data_disks = 1; break; case -2: data_disks = 1; break; case -1: zoned_raid_size(mddev); data_disks = 1; break; case 0: zoned_raid_size(mddev); data_disks = sb->raid_disks; break; case 1: data_disks = 1; break; case 4: case 5: data_disks = sb->raid_disks-1; break; default: printk (UNKNOWN_LEVEL, mdidx(mddev), sb->level); goto abort; } if (!md_size[mdidx(mddev)]) md_size[mdidx(mddev)] = sb->size * data_disks; readahead = MD_READAHEAD; if ((sb->level == 0) || (sb->level == 4) || (sb->level == 5)) { readahead = (mddev->sb->chunk_size>>PAGE_SHIFT) * 4 * data_disks; if (readahead < data_disks * (MAX_SECTORS>>(PAGE_SHIFT-9))*2) readahead = data_disks * (MAX_SECTORS>>(PAGE_SHIFT-9))*2; } else { if (sb->level == -3) readahead = 0; } md_maxreadahead[mdidx(mddev)] = readahead; printk(KERN_INFO "md%d: max total readahead window set to %ldk\n", mdidx(mddev), readahead*(PAGE_SIZE/1024)); printk(KERN_INFO "md%d: %d data-disks, max readahead per data-disk: %ldk\n", mdidx(mddev), data_disks, readahead/data_disks*(PAGE_SIZE/1024)); return 0;abort: return 1;}#define TOO_BIG_CHUNKSIZE KERN_ERR \"too big chunk_size: %d > %d\n"#define TOO_SMALL_CHUNKSIZE KERN_ERR \"too small chunk_size: %d < %ld\n"#define BAD_CHUNKSIZE KERN_ERR \"no chunksize specified, see 'man raidtab'\n"static int do_md_run (mddev_t * mddev){ int pnum, err; int chunk_size; struct md_list_head *tmp; mdk_rdev_t *rdev; if (!mddev->nb_dev) { MD_BUG(); return -EINVAL; } if (mddev->pers) return -EBUSY; /* * Resize disks to align partitions size on a given * chunk size. */ md_size[mdidx(mddev)] = 0; /* * Analyze all RAID superblock(s) */ if (analyze_sbs(mddev)) { MD_BUG(); return -EINVAL; } chunk_size = mddev->sb->chunk_size; pnum = level_to_pers(mddev->sb->level); mddev->param.chunk_size = chunk_size; mddev->param.personality = pnum; if (chunk_size > MAX_CHUNK_SIZE) { printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE); return -EINVAL; } /* * chunk-size has to be a power of 2 and multiples of PAGE_SIZE */ if ( (1 << ffz(~chunk_size)) != chunk_size) { MD_BUG(); return -EINVAL; } if (chunk_size < PAGE_SIZE) { printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE); return -EINVAL; } if (pnum >= MAX_PERSONALITY) { MD_BUG(); return -EINVAL; } if ((pnum != RAID1) && (pnum != LINEAR) && !chunk_size) { /* * 'default chunksize' in the old md code used to * be PAGE_SIZE, baaad. * we abort here to be on the safe side. We dont * want to continue the bad practice. */ printk(BAD_CHUNKSIZE); return -EINVAL; } if (!pers[pnum]) {#ifdef CONFIG_KMOD char module_name[80]; sprintf (module_name, "md-personality-%d", pnum); request_module (module_name); if (!pers[pnum])#endif return -EINVAL; } if (device_size_calculation(mddev)) return -EINVAL; /* * Drop all container device buffers, from now on * the only valid external interface is through the md * device. * Also find largest hardsector size */ md_hardsect_sizes[mdidx(mddev)] = 512; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; fsync_dev(rdev->dev); invalidate_buffers(rdev->dev); if (get_hardsect_size(rdev->dev) > md_hardsect_sizes[mdidx(mddev)]) md_hardsect_sizes[mdidx(mddev)] = get_hardsect_size(rdev->dev); } md_blocksizes[mdidx(mddev)] = 1024; if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; mddev->pers = pers[pnum]; err = mddev->pers->run(mddev); if (err) { printk("pers->run() failed ...\n"); mddev->pers = NULL; return -EINVAL; } mddev->sb->state &= ~(1 << MD_SB_CLEAN); md_update_sb(mddev); /* * md_size has units of 1K blocks, which are * twice as large as sectors. */ md_hd_struct[mdidx(mddev)].start_sect = 0; md_hd_struct[mdidx(mddev)].nr_sects = md_size[mdidx(mddev)] << 1; read_ahead[MD_MAJOR] = 1024; return (0);}#undef TOO_BIG_CHUNKSIZE#undef BAD_CHUNKSIZE#define OUT(x) do { err = (x); goto out; } while (0)static int restart_array (mddev_t *mddev){ int err = 0; /* * Complain if it has no devices */ if (!mddev->nb_dev) OUT(-ENXIO); if (mddev->pers) { if (!mddev->ro) OUT(-EBUSY); mddev->ro = 0; set_device_ro(mddev_to_kdev(mddev), 0); printk (KERN_INFO "md%d switched to read-write mode.\n", mdidx(mddev)); /* * Kick recovery or resync if necessary */ md_recover_arrays(); if (mddev->pers->restart_resync) mddev->pers->restart_resync(mddev); } else err = -EINVAL;out: return err;}#define STILL_MOUNTED KERN_WARNING \"md: md%d still mounted.\n"#define STILL_IN_USE \"md: md%d still in use.\n"static int do_md_stop (mddev_t * mddev, int ro){ int err = 0, resync_interrupted = 0; kdev_t dev = mddev_to_kdev(mddev); if (atomic_read(&mddev->active)>1) { printk(STILL_IN_USE, mdidx(mddev)); OUT(-EBUSY); } /* this shouldn't be needed as above would have fired */ if (!ro && get_super(dev)) { printk (STILL_MOUNTED, mdidx(mddev)); OUT(-EBUSY); } if (mddev->pers) { /* * It is safe to call stop here, it only frees private * data. Also, it tells us if a device is unstoppable * (eg. resyncing is in progress) */ if (mddev->pers->stop_resync) if (mddev->pers->stop_resync(mddev)) resync_interrupted = 1; if (mddev->recovery_running) md_interrupt_thread(md_recovery_thread); /* * This synchronizes with signal delivery to the * resync or reconstruction thread. It also nicely * hangs the process if some reconstruction has not * finished. */ down(&mddev->recovery_sem); up(&mddev->recovery_sem); /* * sync and invalidate buffers because we cannot kill the * main thread with valid IO transfers still around. * the kernel lock protects us from new requests being * added after invalidate_buffers(). */ fsync_dev (mddev_to_kdev(mddev)); fsync_dev (dev); invalidate_buffers (dev); if (ro) { if (mddev->ro) OUT(-ENXIO); mddev->ro = 1; } else { if (mddev->ro) set_device_ro(dev, 0); if (mddev->pers->stop(mddev)) { if (mddev->ro) set_device_ro(dev, 1); OUT(-EBUSY); } if (mddev->ro) mddev->ro = 0; } if (mddev->sb) { /* * mark it clean only if there was no resync * interrupted. */ if (!mddev->recovery_running && !resync_interrupted) { printk("marking sb clean...\n"); mddev->sb->state |= 1 << MD_SB_CLEAN; } md_update_sb(mddev); } if (ro) set_device_ro(dev, 1); } /* * Free resources if final stop */ if (!ro) { printk (KERN_INFO "md%d stopped.\n", mdidx(mddev)); free_mddev(mddev); } else printk (KERN_INFO "md%d switched to read-only mode.\n", mdidx(mddev));out: return err;}#undef OUT/* * We have to safely support old arrays too. */int detect_old_array (mdp_super_t *sb){ if (sb->major_version > 0) return 0; if (sb->minor_version >= 90) return 0; return -EINVAL;}static void autorun_array (mddev_t *mddev){ mdk_rdev_t *rdev; struct md_list_head *tmp; int err; if (mddev->disks.prev == &mddev->disks) { MD_BUG(); return; } printk("running: "); ITERATE_RDEV(mddev,rdev,tmp) { printk("<%s>", partition_name(rdev->dev)); } printk("\nnow!\n"); err = do_md_run (mddev); if (err) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -