📄 md.c
字号:
continue; } if (!sb_equal(sb, rdev->sb)) { printk(INCONSISTENT, partition_name(rdev->dev)); kick_rdev_from_array(rdev); continue; } } /* * OK, we have all disks and the array is ready to run. Let's * find the freshest superblock, that one will be the superblock * that represents the whole array. */ if (!mddev->sb) if (alloc_array_sb(mddev)) goto abort; sb = mddev->sb; freshest = NULL; ITERATE_RDEV(mddev,rdev,tmp) { __u64 ev1, ev2; /* * if the checksum is invalid, use the superblock * only as a last resort. (decrease it's age by * one event) */ if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) { if (rdev->sb->events_lo || rdev->sb->events_hi) if ((rdev->sb->events_lo--)==0) rdev->sb->events_hi--; } printk(KERN_INFO "md: %s's event counter: %08lx\n", partition_name(rdev->dev), (unsigned long)rdev->sb->events_lo); if (!freshest) { freshest = rdev; continue; } /* * Find the newest superblock version */ ev1 = md_event(rdev->sb); ev2 = md_event(freshest->sb); if (ev1 != ev2) { out_of_date = 1; if (ev1 > ev2) freshest = rdev; } } if (out_of_date) { printk(OUT_OF_DATE); printk(KERN_INFO "md: freshest: %s\n", partition_name(freshest->dev)); } memcpy (sb, freshest->sb, sizeof(*sb)); /* * at this point we have picked the 'best' superblock * from all available superblocks. * now we validate this superblock and kick out possibly * failed disks. */ ITERATE_RDEV(mddev,rdev,tmp) { /* * Kick all non-fresh devices */ __u64 ev1, ev2; ev1 = md_event(rdev->sb); ev2 = md_event(sb); ++ev1; if (ev1 < ev2) { printk(KERN_WARNING "md: kicking non-fresh %s from array!\n", partition_name(rdev->dev)); kick_rdev_from_array(rdev); continue; } } /* * Fix up changed device names ... but only if this disk has a * recent update time. Use faulty checksum ones too. */ if (mddev->sb->level != -4) ITERATE_RDEV(mddev,rdev,tmp) { __u64 ev1, ev2, ev3; if (rdev->faulty || rdev->alias_device) { MD_BUG(); goto abort; } ev1 = md_event(rdev->sb); ev2 = md_event(sb); ev3 = ev2; --ev3; if ((rdev->dev != rdev->old_dev) && ((ev1 == ev2) || (ev1 == ev3))) { mdp_disk_t *desc; printk(KERN_WARNING "md: device name has changed from %s to %s since last import!\n", partition_name(rdev->old_dev), partition_name(rdev->dev)); if (rdev->desc_nr == -1) { MD_BUG(); goto abort; } desc = &sb->disks[rdev->desc_nr]; if (rdev->old_dev != MKDEV(desc->major, desc->minor)) { MD_BUG(); goto abort; } desc->major = MAJOR(rdev->dev); desc->minor = MINOR(rdev->dev); desc = &rdev->sb->this_disk; desc->major = MAJOR(rdev->dev); desc->minor = MINOR(rdev->dev); } } /* * Remove unavailable and faulty devices ... * * note that if an array becomes completely unrunnable due to * missing devices, we do not write the superblock back, so the * administrator has a chance to fix things up. The removal thus * only happens if it's nonfatal to the contents of the array. */ for (i = 0; i < MD_SB_DISKS; i++) { int found; mdp_disk_t *desc; kdev_t dev; desc = sb->disks + i; dev = MKDEV(desc->major, desc->minor); /* * We kick faulty devices/descriptors immediately. * * Note: multipath devices are a special case. Since we * were able to read the superblock on the path, we don't * care if it was previously marked as faulty, it's up now * so enable it. */ if (disk_faulty(desc) && mddev->sb->level != -4) { found = 0; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr != desc->number) continue; printk(KERN_WARNING "md%d: kicking faulty %s!\n", mdidx(mddev),partition_name(rdev->dev)); kick_rdev_from_array(rdev); found = 1; break; } if (!found) { if (dev == MKDEV(0,0)) continue; printk(KERN_WARNING "md%d: removing former faulty %s!\n", mdidx(mddev), partition_name(dev)); } remove_descriptor(desc, sb); continue; } else if (disk_faulty(desc)) { /* * multipath entry marked as faulty, unfaulty it */ rdev = find_rdev(mddev, dev); if(rdev) mark_disk_spare(desc); else remove_descriptor(desc, sb); } if (dev == MKDEV(0,0)) continue; /* * Is this device present in the rdev ring? */ found = 0; ITERATE_RDEV(mddev,rdev,tmp) { /* * Multi-path IO special-case: since we have no * this_disk descriptor at auto-detect time, * we cannot check rdev->number. * We can check the device though. */ if ((sb->level == -4) && (rdev->dev == MKDEV(desc->major,desc->minor))) { found = 1; break; } if (rdev->desc_nr == desc->number) { found = 1; break; } } if (found) continue; printk(KERN_WARNING "md%d: former device %s is unavailable, removing from array!\n", mdidx(mddev), partition_name(dev)); remove_descriptor(desc, sb); } /* * Double check wether all devices mentioned in the * superblock are in the rdev ring. */ first = 1; for (i = 0; i < MD_SB_DISKS; i++) { mdp_disk_t *desc; kdev_t dev; desc = sb->disks + i; dev = MKDEV(desc->major, desc->minor); if (dev == MKDEV(0,0)) continue; if (disk_faulty(desc)) { MD_BUG(); goto abort; } rdev = find_rdev(mddev, dev); if (!rdev) { MD_BUG(); goto abort; } /* * In the case of Multipath-IO, we have no * other information source to find out which * disk is which, only the position of the device * in the superblock: */ if (mddev->sb->level == -4) { if ((rdev->desc_nr != -1) && (rdev->desc_nr != i)) { MD_BUG(); goto abort; } rdev->desc_nr = i; if (!first) rdev->alias_device = 1; else first = 0; } } /* * Kick all rdevs that are not in the * descriptor array: */ ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == -1) kick_rdev_from_array(rdev); } /* * Do a final reality check. */ if (mddev->sb->level != -4) { ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == -1) { MD_BUG(); goto abort; } /* * is the desc_nr unique? */ ITERATE_RDEV(mddev,rdev2,tmp2) { if ((rdev2 != rdev) && (rdev2->desc_nr == rdev->desc_nr)) { MD_BUG(); goto abort; } } /* * is the device unique? */ ITERATE_RDEV(mddev,rdev2,tmp2) { if ((rdev2 != rdev) && (rdev2->dev == rdev->dev)) { MD_BUG(); goto abort; } } } } /* * Check if we can support this RAID array */ if (sb->major_version != MD_MAJOR_VERSION || sb->minor_version > MD_MINOR_VERSION) { printk(OLD_VERSION, mdidx(mddev), sb->major_version, sb->minor_version, sb->patch_version); goto abort; } if ((sb->state != (1 << MD_SB_CLEAN)) && ((sb->level == 1) || (sb->level == 4) || (sb->level == 5))) printk(NOT_CLEAN_IGNORE, mdidx(mddev)); return 0;abort: return 1;}#undef INCONSISTENT#undef OUT_OF_DATE#undef OLD_VERSION#undef OLD_LEVELstatic int device_size_calculation(mddev_t * mddev){ int data_disks = 0, persistent; unsigned int readahead; mdp_super_t *sb = mddev->sb; struct md_list_head *tmp; mdk_rdev_t *rdev; /* * Do device size calculation. Bail out if too small. * (we have to do this after having validated chunk_size, * because device size has to be modulo chunk_size) */ persistent = !mddev->sb->not_persistent; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; if (rdev->size) { MD_BUG(); continue; } rdev->size = calc_dev_size(rdev->dev, mddev, persistent); if (rdev->size < sb->chunk_size / 1024) { printk(KERN_WARNING "md: Dev %s smaller than chunk_size: %ldk < %dk\n", partition_name(rdev->dev), rdev->size, sb->chunk_size / 1024); return -EINVAL; } } switch (sb->level) { case -4: data_disks = 1; break; case -3: data_disks = 1; break; case -2: data_disks = 1; break; case -1: zoned_raid_size(mddev); data_disks = 1; break; case 0: zoned_raid_size(mddev); data_disks = sb->raid_disks; break; case 1: data_disks = 1; break; case 4: case 5: data_disks = sb->raid_disks-1; break; default: printk(UNKNOWN_LEVEL, mdidx(mddev), sb->level); goto abort; } if (!md_size[mdidx(mddev)]) md_size[mdidx(mddev)] = sb->size * data_disks; readahead = MD_READAHEAD; if ((sb->level == 0) || (sb->level == 4) || (sb->level == 5)) { readahead = (mddev->sb->chunk_size>>PAGE_SHIFT) * 4 * data_disks; if (readahead < data_disks * (MAX_SECTORS>>(PAGE_SHIFT-9))*2) readahead = data_disks * (MAX_SECTORS>>(PAGE_SHIFT-9))*2; } else { // (no multipath branch - it uses the default setting) if (sb->level == -3) readahead = 0; } md_maxreadahead[mdidx(mddev)] = readahead; printk(KERN_INFO "md%d: max total readahead window set to %ldk\n", mdidx(mddev), readahead*(PAGE_SIZE/1024)); printk(KERN_INFO "md%d: %d data-disks, max readahead per data-disk: %ldk\n", mdidx(mddev), data_disks, readahead/data_disks*(PAGE_SIZE/1024)); return 0;abort: return 1;}#define TOO_BIG_CHUNKSIZE KERN_ERR \"too big chunk_size: %d > %d\n"#define TOO_SMALL_CHUNKSIZE KERN_ERR \"too small chunk_size: %d < %ld\n"#define BAD_CHUNKSIZE KERN_ERR \"no chunksize specified, see 'man raidtab'\n"static int do_md_run(mddev_t * mddev){ int pnum, err; int chunk_size; struct md_list_head *tmp; mdk_rdev_t *rdev; if (!mddev->nb_dev) { MD_BUG(); return -EINVAL; } if (mddev->pers) return -EBUSY; /* * Resize disks to align partitions size on a given * chunk size. */ md_size[mdidx(mddev)] = 0; /* * Analyze all RAID superblock(s) */ if (analyze_sbs(mddev)) { MD_BUG(); return -EINVAL; } chunk_size = mddev->sb->chunk_size; pnum = level_to_pers(mddev->sb->level); mddev->param.chunk_size = chunk_size; mddev->param.personality = pnum; if ((pnum != MULTIPATH) && (pnum != RAID1)) { if (!chunk_size) { /* * 'default chunksize' in the old md code used to * be PAGE_SIZE, baaad. * we abort here to be on the safe side. We dont * want to continue the bad practice. */ printk(BAD_CHUNKSIZE); return -EINVAL; } if (chunk_size > MAX_CHUNK_SIZE) { printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE); return -EINVAL; } /* * chunk-size has to be a power of 2 and multiples of PAGE_SIZE */ if ( (1 << ffz(~chunk_size)) != chunk_size) { MD_BUG(); return -EINVAL; } if (chunk_size < PAGE_SIZE) { printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE); return -EINVAL; } } else if (chunk_size) printk(KERN_INFO "md: RAID level %d does not need chunksize! Continuing anyway.\n", mddev->sb->level); if (pnum >= MAX_PERSONALITY) { MD_BUG(); return -EINVAL; } if (!pers[pnum]) {#ifdef CONFIG_KMOD char module_name[80]; sprintf (module_name, "md-personality-%d", pnum); request_module (module_name); if (!pers[pnum])#endif { printk(KERN_ERR "md: personality %d is not loaded!\n", pnum); return -EINVAL; } } if (device_size_calculation(mddev)) return -EINVAL; /* * Drop all container device buffers, from now on * the only valid external interface is through the md * device. * Also find largest hardsector size */ md_hardsect_sizes[mdidx(mddev)] = 512; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; invalidate_device(rdev->dev, 1); if (get_hardsect_size(rdev->dev) > md_hardsect_sizes[mdidx(mddev)]) md_hardsect_sizes[mdidx(mddev)] = get_hardsect_size(rdev->dev); } md_blocksizes[mdidx(mddev)] = 1024; if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; mddev->pers = pers[pnum]; err = mddev->pers->run(mddev); if (err) { printk(KERN_ERR "md: pers->run() failed ...\n"); mddev->pers = NULL; return -EINVAL; } mddev->sb->state &= ~(1 << MD_SB_CLEAN); mddev->sb_dirty = 1; md_update_sb(mddev); /* * md_size has units of 1K blocks, which are * twice as large as sectors. */ md_hd_struct[mdidx(mddev)].start_sect = 0; register_disk(&md_gendisk, MKDEV(MAJOR_NR,mdidx(mddev)), 1, &md_fops, md_size[mdidx(mddev)]<<1); read_ahead[MD_MAJOR] = 1024; return (0);}#undef TOO_BIG_CHUNKSIZE#undef BAD_CHUNKSIZE#define OUT(x) do { err = (x); goto out; } while (0)static int restart_array(mddev_t *mddev){ int err = 0; /* * Complain if it has no devices */ if (!mddev->nb_dev) OUT(-ENXIO); if (mddev->pers) { if (!mddev->ro) OUT(-EBUSY); mddev->ro = 0; set_device_ro(mddev_to_kdev(mddev), 0); printk(KERN_INFO "md: md%d switched to read-write mode.\n", mdidx(mddev)); /* * Kick recovery or resync if necessary */ md_recover_arrays(); if (mddev->pers->restart_resync) mddev->pers->restart_resync(mddev); } else { printk(KERN_ERR "md: md%d has no personality assigned.\n", mdidx(mddev)); err = -EINVAL; }out: return err;}#define STILL_MOUNTED KERN_WARNING \"md: md%d still mounted.\n"#define STILL_IN_USE \"md: md%d still in use.\n"static int do_md_stop(mddev_t * mddev, int ro){ int err = 0, resync_interrupted = 0; kdev_t dev = mddev_to_kdev(mddev); if (atomic_read(&mddev->active)>1) { printk(STILL_IN_USE, mdidx(mddev)); OUT(-EBUSY); } if (mddev->pers) { /* * It is safe to call stop here, it only frees private * data. Also, it tells us if a device is unstoppable * (eg. resyncing is in progress)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -