📄 md.c
字号:
mdk_rdev_t *same_pdev; if (rdev->mddev) { MD_BUG(); return; } same_pdev = match_dev_unit(mddev, rdev->dev); if (same_pdev) printk( KERN_WARNING"md%d: WARNING: %s appears to be on the same physical disk as %s. True\n"" protection against single-disk failure might be compromised.\n", mdidx(mddev), partition_name(rdev->dev), partition_name(same_pdev->dev)); md_list_add(&rdev->same_set, &mddev->disks); rdev->mddev = mddev; mddev->nb_dev++; printk("bind<%s,%d>\n", partition_name(rdev->dev), mddev->nb_dev);}static void unbind_rdev_from_array (mdk_rdev_t * rdev){ if (!rdev->mddev) { MD_BUG(); return; } md_list_del(&rdev->same_set); MD_INIT_LIST_HEAD(&rdev->same_set); rdev->mddev->nb_dev--; printk("unbind<%s,%d>\n", partition_name(rdev->dev), rdev->mddev->nb_dev); rdev->mddev = NULL;}/* * prevent the device from being mounted, repartitioned or * otherwise reused by a RAID array (or any other kernel * subsystem), by opening the device. [simply getting an * inode is not enough, the SCSI module usage code needs * an explicit open() on the device] */static int lock_rdev (mdk_rdev_t *rdev){ int err = 0; struct block_device *bdev; bdev = bdget(rdev->dev); if (bdev == NULL) return -ENOMEM; err = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE); if (!err) { rdev->bdev = bdev; } return err;}static void unlock_rdev (mdk_rdev_t *rdev){ if (!rdev->bdev) MD_BUG(); blkdev_put(rdev->bdev, BDEV_FILE); bdput(rdev->bdev); rdev->bdev = NULL;}static void export_rdev (mdk_rdev_t * rdev){ printk("export_rdev(%s)\n",partition_name(rdev->dev)); if (rdev->mddev) MD_BUG(); unlock_rdev(rdev); free_disk_sb(rdev); md_list_del(&rdev->all); MD_INIT_LIST_HEAD(&rdev->all); if (rdev->pending.next != &rdev->pending) { printk("(%s was pending)\n",partition_name(rdev->dev)); md_list_del(&rdev->pending); MD_INIT_LIST_HEAD(&rdev->pending); } rdev->dev = 0; rdev->faulty = 0; kfree(rdev);}static void kick_rdev_from_array (mdk_rdev_t * rdev){ unbind_rdev_from_array(rdev); export_rdev(rdev);}static void export_array (mddev_t *mddev){ struct md_list_head *tmp; mdk_rdev_t *rdev; mdp_super_t *sb = mddev->sb; if (mddev->sb) { mddev->sb = NULL; free_page((unsigned long) sb); } ITERATE_RDEV(mddev,rdev,tmp) { if (!rdev->mddev) { MD_BUG(); continue; } kick_rdev_from_array(rdev); } if (mddev->nb_dev) MD_BUG();}static void free_mddev (mddev_t *mddev){ if (!mddev) { MD_BUG(); return; } export_array(mddev); md_size[mdidx(mddev)] = 0; md_hd_struct[mdidx(mddev)].nr_sects = 0; /* * Make sure nobody else is using this mddev * (careful, we rely on the global kernel lock here) */ while (md_atomic_read(&mddev->resync_sem.count) != 1) schedule(); while (md_atomic_read(&mddev->recovery_sem.count) != 1) schedule(); del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev))); md_list_del(&mddev->all_mddevs); MD_INIT_LIST_HEAD(&mddev->all_mddevs); kfree(mddev); MOD_DEC_USE_COUNT;}#undef BAD_CSUM#undef BAD_MAGIC#undef OUT_OF_MEM#undef NO_SBstatic void print_desc(mdp_disk_t *desc){ printk(" DISK<N:%d,%s(%d,%d),R:%d,S:%d>\n", desc->number, partition_name(MKDEV(desc->major,desc->minor)), desc->major,desc->minor,desc->raid_disk,desc->state);}static void print_sb(mdp_super_t *sb){ int i; printk(" SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n", sb->major_version, sb->minor_version, sb->patch_version, sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3, sb->ctime); printk(" L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", sb->level, sb->size, sb->nr_disks, sb->raid_disks, sb->md_minor, sb->layout, sb->chunk_size); printk(" UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n", sb->utime, sb->state, sb->active_disks, sb->working_disks, sb->failed_disks, sb->spare_disks, sb->sb_csum, (unsigned long)sb->events_lo); for (i = 0; i < MD_SB_DISKS; i++) { mdp_disk_t *desc; desc = sb->disks + i; printk(" D %2d: ", i); print_desc(desc); } printk(" THIS: "); print_desc(&sb->this_disk);}static void print_rdev(mdk_rdev_t *rdev){ printk(" rdev %s: O:%s, SZ:%08ld F:%d DN:%d ", partition_name(rdev->dev), partition_name(rdev->old_dev), rdev->size, rdev->faulty, rdev->desc_nr); if (rdev->sb) { printk("rdev superblock:\n"); print_sb(rdev->sb); } else printk("no rdev superblock!\n");}void md_print_devices (void){ struct md_list_head *tmp, *tmp2; mdk_rdev_t *rdev; mddev_t *mddev; printk("\n"); printk(" **********************************\n"); printk(" * <COMPLETE RAID STATE PRINTOUT> *\n"); printk(" **********************************\n"); ITERATE_MDDEV(mddev,tmp) { printk("md%d: ", mdidx(mddev)); ITERATE_RDEV(mddev,rdev,tmp2) printk("<%s>", partition_name(rdev->dev)); if (mddev->sb) { printk(" array superblock:\n"); print_sb(mddev->sb); } else printk(" no array superblock.\n"); ITERATE_RDEV(mddev,rdev,tmp2) print_rdev(rdev); } printk(" **********************************\n"); printk("\n");}static int sb_equal ( mdp_super_t *sb1, mdp_super_t *sb2){ int ret; mdp_super_t *tmp1, *tmp2; tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL); tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL); if (!tmp1 || !tmp2) { ret = 0; goto abort; } *tmp1 = *sb1; *tmp2 = *sb2; /* * nr_disks is not constant */ tmp1->nr_disks = 0; tmp2->nr_disks = 0; if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) ret = 0; else ret = 1;abort: if (tmp1) kfree(tmp1); if (tmp2) kfree(tmp2); return ret;}static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2){ if ( (rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) && (rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) && (rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) && (rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3)) return 1; return 0;}static mdk_rdev_t * find_rdev_all (kdev_t dev){ struct md_list_head *tmp; mdk_rdev_t *rdev; tmp = all_raid_disks.next; while (tmp != &all_raid_disks) { rdev = md_list_entry(tmp, mdk_rdev_t, all); if (rdev->dev == dev) return rdev; tmp = tmp->next; } return NULL;}#define GETBLK_FAILED KERN_ERR \"md: getblk failed for device %s\n"static int write_disk_sb(mdk_rdev_t * rdev){ struct buffer_head *bh; kdev_t dev; unsigned long sb_offset, size; mdp_super_t *sb; if (!rdev->sb) { MD_BUG(); return -1; } if (rdev->faulty) { MD_BUG(); return -1; } if (rdev->sb->md_magic != MD_SB_MAGIC) { MD_BUG(); return -1; } dev = rdev->dev; sb_offset = calc_dev_sboffset(dev, rdev->mddev, 1); if (rdev->sb_offset != sb_offset) { printk("%s's sb offset has changed from %ld to %ld, skipping\n", partition_name(dev), rdev->sb_offset, sb_offset); goto skip; } /* * If the disk went offline meanwhile and it's just a spare, then * it's size has changed to zero silently, and the MD code does * not yet know that it's faulty. */ size = calc_dev_size(dev, rdev->mddev, 1); if (size != rdev->size) { printk("%s's size has changed from %ld to %ld since import, skipping\n", partition_name(dev), rdev->size, size); goto skip; } printk("(write) %s's sb offset: %ld\n", partition_name(dev), sb_offset); fsync_dev(dev); set_blocksize(dev, MD_SB_BYTES); bh = getblk(dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); if (!bh) { printk(GETBLK_FAILED, partition_name(dev)); return 1; } memset(bh->b_data,0,bh->b_size); sb = (mdp_super_t *) bh->b_data; memcpy(sb, rdev->sb, MD_SB_BYTES); mark_buffer_uptodate(bh, 1); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); wait_on_buffer(bh); brelse(bh); fsync_dev(dev);skip: return 0;}#undef GETBLK_FAILED static void set_this_disk(mddev_t *mddev, mdk_rdev_t *rdev){ int i, ok = 0; mdp_disk_t *desc; for (i = 0; i < MD_SB_DISKS; i++) { desc = mddev->sb->disks + i;#if 0 if (disk_faulty(desc)) { if (MKDEV(desc->major,desc->minor) == rdev->dev) ok = 1; continue; }#endif if (MKDEV(desc->major,desc->minor) == rdev->dev) { rdev->sb->this_disk = *desc; rdev->desc_nr = desc->number; ok = 1; break; } } if (!ok) { MD_BUG(); }}static int sync_sbs(mddev_t * mddev){ mdk_rdev_t *rdev; mdp_super_t *sb; struct md_list_head *tmp; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; sb = rdev->sb; *sb = *mddev->sb; set_this_disk(mddev, rdev); sb->sb_csum = calc_sb_csum(sb); } return 0;}int md_update_sb(mddev_t * mddev){ int first, err, count = 100; struct md_list_head *tmp; mdk_rdev_t *rdev;repeat: mddev->sb->utime = CURRENT_TIME; if ((++mddev->sb->events_lo)==0) ++mddev->sb->events_hi; if ((mddev->sb->events_lo|mddev->sb->events_hi)==0) { /* * oops, this 64-bit counter should never wrap. * Either we are in around ~1 trillion A.C., assuming * 1 reboot per second, or we have a bug: */ MD_BUG(); mddev->sb->events_lo = mddev->sb->events_hi = 0xffffffff; } sync_sbs(mddev); /* * do not write anything to disk if using * nonpersistent superblocks */ if (mddev->sb->not_persistent) return 0; printk(KERN_INFO "md: updating md%d RAID superblock on device\n", mdidx(mddev)); first = 1; err = 0; ITERATE_RDEV(mddev,rdev,tmp) { if (!first) { first = 0; printk(", "); } if (rdev->faulty) printk("(skipping faulty "); printk("%s ", partition_name(rdev->dev)); if (!rdev->faulty) { printk("[events: %08lx]", (unsigned long)rdev->sb->events_lo); err += write_disk_sb(rdev); } else printk(")\n"); } printk(".\n"); if (err) { printk("errors occured during superblock update, repeating\n"); if (--count) goto repeat; printk("excessive errors occured during superblock update, exiting\n"); } return 0;}/* * Import a device. If 'on_disk', then sanity check the superblock * * mark the device faulty if: * * - the device is nonexistent (zero size) * - the device has no valid superblock * * a faulty rdev _never_ has rdev->sb set. */static int md_import_device (kdev_t newdev, int on_disk){ int err; mdk_rdev_t *rdev; unsigned int size; if (find_rdev_all(newdev)) return -EEXIST; rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL); if (!rdev) { printk("could not alloc mem for %s!\n", partition_name(newdev)); return -ENOMEM; } memset(rdev, 0, sizeof(*rdev)); if (get_super(newdev)) { printk("md: can not import %s, has active inodes!\n", partition_name(newdev)); err = -EBUSY; goto abort_free; } if ((err = alloc_disk_sb(rdev))) goto abort_free; rdev->dev = newdev; if (lock_rdev(rdev)) { printk("md: could not lock %s, zero-size? Marking faulty.\n", partition_name(newdev)); err = -EINVAL; goto abort_free; } rdev->desc_nr = -1; rdev->faulty = 0; size = 0; if (blk_size[MAJOR(newdev)]) size = blk_size[MAJOR(newdev)][MINOR(newdev)]; if (!size) { printk("md: %s has zero size, marking faulty!\n", partition_name(newdev)); err = -EINVAL; goto abort_free; } if (on_disk) { if ((err = read_disk_sb(rdev))) { printk("md: could not read %s's sb, not importing!\n", partition_name(newdev)); goto abort_free; } if ((err = check_disk_sb(rdev))) { printk("md: %s has invalid sb, not importing!\n", partition_name(newdev)); goto abort_free; } rdev->old_dev = MKDEV(rdev->sb->this_disk.major, rdev->sb->this_disk.minor); rdev->desc_nr = rdev->sb->this_disk.number; } md_list_add(&rdev->all, &all_raid_disks); MD_INIT_LIST_HEAD(&rdev->pending); if (rdev->faulty && rdev->sb) free_disk_sb(rdev); return 0;abort_free: if (rdev->sb) { if (rdev->bdev) unlock_rdev(rdev); free_disk_sb(rdev); } kfree(rdev); return err;}/* * Check a full RAID array for plausibility */#define INCONSISTENT KERN_ERR \"md: fatal superblock inconsistency in %s -- removing from array\n"#define OUT_OF_DATE KERN_ERR \"md: superblock update time inconsistency -- using the most recent one\n"#define OLD_VERSION KERN_ALERT \"md: md%d: unsupported raid array version %d.%d.%d\n"#define NOT_CLEAN_IGNORE KERN_ERR \"md: md%d: raid array is not clean -- starting background reconstruction\n"#define UNKNOWN_LEVEL KERN_ERR \"md: md%d: unsupported raid level %d\n"static int analyze_sbs (mddev_t * mddev){ int out_of_date = 0, i; struct md_list_head *tmp, *tmp2; mdk_rdev_t *rdev, *rdev2, *freshest; mdp_super_t *sb; /* * Verify the RAID superblock on each real device */ ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) { MD_BUG(); goto abort; } if (!rdev->sb) { MD_BUG(); goto abort; } if (check_disk_sb(rdev)) goto abort; } /* * The superblock constant part has to be the same * for all disks in the array. */ sb = NULL; ITERATE_RDEV(mddev,rdev,tmp) { if (!sb) { sb = rdev->sb; continue; } if (!sb_equal(sb, rdev->sb)) { printk (INCONSISTENT, partition_name(rdev->dev)); kick_rdev_from_array(rdev); continue; } } /* * OK, we have all disks and the array is ready to run. Let's * find the freshest superblock, that one will be the superblock * that represents the whole array. */ if (!mddev->sb) if (alloc_array_sb(mddev)) goto abort; sb = mddev->sb; freshest = NULL; ITERATE_RDEV(mddev,rdev,tmp) { __u64 ev1, ev2; /* * if the checksum is invalid, use the superblock * only as a last resort. (decrease it's age by * one event)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -