md.c
来自「linux 内核源代码」· C语言 代码 · 共 2,595 行 · 第 1/5 页
C
2,595 行
MD_BUG(); return -EINVAL; } if (rdev->sb_loaded) return 0; if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) goto fail; rdev->sb_loaded = 1; return 0;fail: printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n", bdevname(rdev->bdev,b)); return -EINVAL;}static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2){ if ( (sb1->set_uuid0 == sb2->set_uuid0) && (sb1->set_uuid1 == sb2->set_uuid1) && (sb1->set_uuid2 == sb2->set_uuid2) && (sb1->set_uuid3 == sb2->set_uuid3)) return 1; return 0;}static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2){ int ret; mdp_super_t *tmp1, *tmp2; tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL); tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL); if (!tmp1 || !tmp2) { ret = 0; printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n"); goto abort; } *tmp1 = *sb1; *tmp2 = *sb2; /* * nr_disks is not constant */ tmp1->nr_disks = 0; tmp2->nr_disks = 0; if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) ret = 0; else ret = 1;abort: kfree(tmp1); kfree(tmp2); return ret;}static u32 md_csum_fold(u32 csum){ csum = (csum & 0xffff) + (csum >> 16); return (csum & 0xffff) + (csum >> 16);}static unsigned int calc_sb_csum(mdp_super_t * sb){ u64 newcsum = 0; u32 *sb32 = (u32*)sb; int i; unsigned int disk_csum, csum; disk_csum = sb->sb_csum; sb->sb_csum = 0; for (i = 0; i < MD_SB_BYTES/4 ; i++) newcsum += sb32[i]; csum = (newcsum & 0xffffffff) + (newcsum>>32);#ifdef CONFIG_ALPHA /* This used to use csum_partial, which was wrong for several * reasons including that different results are returned on * different architectures. It isn't critical that we get exactly * the same return value as before (we always csum_fold before * testing, and that removes any differences). However as we * know that csum_partial always returned a 16bit value on * alphas, do a fold to maximise conformity to previous behaviour. */ sb->sb_csum = md_csum_fold(disk_csum);#else sb->sb_csum = disk_csum;#endif return csum;}/* * Handle superblock details. * We want to be able to handle multiple superblock formats * so we have a common interface to them all, and an array of * different handlers. * We rely on user-space to write the initial superblock, and support * reading and updating of superblocks. * Interface methods are: * int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev, int minor_version) * loads and validates a superblock on dev. * if refdev != NULL, compare superblocks on both devices * Return: * 0 - dev has a superblock that is compatible with refdev * 1 - dev has a superblock that is compatible and newer than refdev * so dev should be used as the refdev in future * -EINVAL superblock incompatible or invalid * -othererror e.g. -EIO * * int validate_super(mddev_t *mddev, mdk_rdev_t *dev) * Verify that dev is acceptable into mddev. * The first time, mddev->raid_disks will be 0, and data from * dev should be merged in. Subsequent calls check that dev * is new enough. Return 0 or -EINVAL * * void sync_super(mddev_t *mddev, mdk_rdev_t *dev) * Update the superblock for rdev with data in mddev * This does not write to disc. * */struct super_type { char *name; struct module *owner; int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version); int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);};/* * load_super for 0.90.0 */static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version){ char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; mdp_super_t *sb; int ret; sector_t sb_offset; /* * Calculate the position of the superblock, * it's at the end of the disk. * * It also happens to be a multiple of 4Kb. */ sb_offset = calc_dev_sboffset(rdev->bdev); rdev->sb_offset = sb_offset; ret = read_disk_sb(rdev, MD_SB_BYTES); if (ret) return ret; ret = -EINVAL; bdevname(rdev->bdev, b); sb = (mdp_super_t*)page_address(rdev->sb_page); if (sb->md_magic != MD_SB_MAGIC) { printk(KERN_ERR "md: invalid raid superblock magic on %s\n", b); goto abort; } if (sb->major_version != 0 || sb->minor_version < 90 || sb->minor_version > 91) { printk(KERN_WARNING "Bad version number %d.%d on %s\n", sb->major_version, sb->minor_version, b); goto abort; } if (sb->raid_disks <= 0) goto abort; if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) { printk(KERN_WARNING "md: invalid superblock checksum on %s\n", b); goto abort; } rdev->preferred_minor = sb->md_minor; rdev->data_offset = 0; rdev->sb_size = MD_SB_BYTES; if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) { if (sb->level != 1 && sb->level != 4 && sb->level != 5 && sb->level != 6 && sb->level != 10) { /* FIXME use a better test */ printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); goto abort; } } if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; else rdev->desc_nr = sb->this_disk.number; if (refdev == 0) ret = 1; else { __u64 ev1, ev2; mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page); if (!uuid_equal(refsb, sb)) { printk(KERN_WARNING "md: %s has different UUID to %s\n", b, bdevname(refdev->bdev,b2)); goto abort; } if (!sb_equal(refsb, sb)) { printk(KERN_WARNING "md: %s has same UUID" " but different superblock to %s\n", b, bdevname(refdev->bdev, b2)); goto abort; } ev1 = md_event(sb); ev2 = md_event(refsb); if (ev1 > ev2) ret = 1; else ret = 0; } rdev->size = calc_dev_size(rdev, sb->chunk_size); if (rdev->size < sb->size && sb->level > 1) /* "this cannot possibly happen" ... */ ret = -EINVAL; abort: return ret;}/* * validate_super for 0.90.0 */static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev){ mdp_disk_t *desc; mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); __u64 ev1 = md_event(sb); rdev->raid_disk = -1; rdev->flags = 0; if (mddev->raid_disks == 0) { mddev->major_version = 0; mddev->minor_version = sb->minor_version; mddev->patch_version = sb->patch_version; mddev->persistent = ! sb->not_persistent; mddev->chunk_size = sb->chunk_size; mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; mddev->clevel[0] = 0; mddev->layout = sb->layout; mddev->raid_disks = sb->raid_disks; mddev->size = sb->size; mddev->events = ev1; mddev->bitmap_offset = 0; mddev->default_bitmap_offset = MD_SB_BYTES >> 9; if (mddev->minor_version >= 91) { mddev->reshape_position = sb->reshape_position; mddev->delta_disks = sb->delta_disks; mddev->new_level = sb->new_level; mddev->new_layout = sb->new_layout; mddev->new_chunk = sb->new_chunk; } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; mddev->new_chunk = mddev->chunk_size; } if (sb->state & (1<<MD_SB_CLEAN)) mddev->recovery_cp = MaxSector; else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { mddev->recovery_cp = sb->recovery_cp; } else mddev->recovery_cp = 0; } memcpy(mddev->uuid+0, &sb->set_uuid0, 4); memcpy(mddev->uuid+4, &sb->set_uuid1, 4); memcpy(mddev->uuid+8, &sb->set_uuid2, 4); memcpy(mddev->uuid+12,&sb->set_uuid3, 4); mddev->max_disks = MD_SB_DISKS; if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && mddev->bitmap_file == NULL) mddev->bitmap_offset = mddev->default_bitmap_offset; } else if (mddev->pers == NULL) { /* Insist on good event counter while assembling */ ++ev1; if (ev1 < mddev->events) return -EINVAL; } else if (mddev->bitmap) { /* if adding to array with a bitmap, then we can accept an * older device ... but not too old. */ if (ev1 < mddev->bitmap->events_cleared) return 0; } else { if (ev1 < mddev->events) /* just a hot-add of a new device, leave raid_disk at -1 */ return 0; } if (mddev->level != LEVEL_MULTIPATH) { desc = sb->disks + rdev->desc_nr; if (desc->state & (1<<MD_DISK_FAULTY)) set_bit(Faulty, &rdev->flags); else if (desc->state & (1<<MD_DISK_SYNC) /* && desc->raid_disk < mddev->raid_disks */) { set_bit(In_sync, &rdev->flags); rdev->raid_disk = desc->raid_disk; } if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) set_bit(WriteMostly, &rdev->flags); } else /* MULTIPATH are always insync */ set_bit(In_sync, &rdev->flags); return 0;}/* * sync_super for 0.90.0 */static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev){ mdp_super_t *sb; struct list_head *tmp; mdk_rdev_t *rdev2; int next_spare = mddev->raid_disks; /* make rdev->sb match mddev data.. * * 1/ zero out disks * 2/ Add info for each disk, keeping track of highest desc_nr (next_spare); * 3/ any empty disks < next_spare become removed * * disks[0] gets initialised to REMOVED because * we cannot be sure from other fields if it has * been initialised or not. */ int i; int active=0, working=0,failed=0,spare=0,nr_disks=0; rdev->sb_size = MD_SB_BYTES; sb = (mdp_super_t*)page_address(rdev->sb_page); memset(sb, 0, sizeof(*sb)); sb->md_magic = MD_SB_MAGIC; sb->major_version = mddev->major_version; sb->patch_version = mddev->patch_version; sb->gvalid_words = 0; /* ignored */ memcpy(&sb->set_uuid0, mddev->uuid+0, 4); memcpy(&sb->set_uuid1, mddev->uuid+4, 4); memcpy(&sb->set_uuid2, mddev->uuid+8, 4); memcpy(&sb->set_uuid3, mddev->uuid+12,4); sb->ctime = mddev->ctime; sb->level = mddev->level; sb->size = mddev->size; sb->raid_disks = mddev->raid_disks; sb->md_minor = mddev->md_minor; sb->not_persistent = !mddev->persistent; sb->utime = mddev->utime; sb->state = 0; sb->events_hi = (mddev->events>>32); sb->events_lo = (u32)mddev->events; if (mddev->reshape_position == MaxSector) sb->minor_version = 90; else { sb->minor_version = 91; sb->reshape_position = mddev->reshape_position; sb->new_level = mddev->new_level; sb->delta_disks = mddev->delta_disks; sb->new_layout = mddev->new_layout; sb->new_chunk = mddev->new_chunk; } mddev->minor_version = sb->minor_version; if (mddev->in_sync) { sb->recovery_cp = mddev->recovery_cp; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; if (mddev->recovery_cp == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else sb->recovery_cp = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_size; if (mddev->bitmap && mddev->bitmap_file == NULL) sb->state |= (1<<MD_SB_BITMAP_PRESENT); sb->disks[0].state = (1<<MD_DISK_REMOVED); ITERATE_RDEV(mddev,rdev2,tmp) { mdp_disk_t *d; int desc_nr; if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) && !test_bit(Faulty, &rdev2->flags)) desc_nr = rdev2->raid_disk; else desc_nr = next_spare++; rdev2->desc_nr = desc_nr; d = &sb->disks[rdev2->desc_nr]; nr_disks++; d->number = rdev2->desc_nr; d->major = MAJOR(rdev2->bdev->bd_dev); d->minor = MINOR(rdev2->bdev->bd_dev); if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) && !test_bit(Faulty, &rdev2->flags)) d->raid_disk = rdev2->raid_disk; else d->raid_disk = rdev2->desc_nr; /* compatibility */ if (test_bit(Faulty, &rdev2->flags)) d->state = (1<<MD_DISK_FAULTY); else if (test_bit(In_sync, &rdev2->flags)) { d->state = (1<<MD_DISK_ACTIVE); d->state |= (1<<MD_DISK_SYNC); active++; working++; } else { d->state = 0; spare++; working++; } if (test_bit(WriteMostly, &rdev2->flags)) d->state |= (1<<MD_DISK_WRITEMOSTLY); } /* now set the "removed" and "faulty" bits on any missing devices */ for (i=0 ; i < mddev->raid_disks ; i++) { mdp_disk_t *d = &sb->disks[i]; if (d->state == 0 && d->number == 0) { d->number = i; d->raid_disk = i; d->state = (1<<MD_DISK_REMOVED); d->state |= (1<<MD_DISK_FAULTY); failed++; } } sb->nr_disks = nr_disks; sb->active_disks = active; sb->working_disks = working; sb->failed_disks = failed; sb->spare_disks = spare; sb->this_disk = sb->disks[rdev->desc_nr]; sb->sb_csum = calc_sb_csum(sb);}/* * version 1 superblock */static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb){ __le32 disk_csum; u32 csum; unsigned long long newcsum; int size = 256 + le32_to_cpu(sb->max_dev)*2; __le32 *isuper = (__le32*)sb; int i; disk_csum = sb->sb_csum; sb->sb_csum = 0; newcsum = 0; for (i=0; size>=4; size -= 4 ) newcsum += le32_to_cpu(*isuper++); if (size == 2) newcsum += le16_to_cpu(*(__le16*) isuper); csum = (newcsum & 0xffffffff) + (newcsum >> 32); sb->sb_csum = disk_csum; return cpu_to_le32(csum);}static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version){ struct mdp_superblock_1 *sb; int ret; sector_t sb_offset; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; /* * Calculate the position of the superblock. * It is always aligned to a 4K boundary and * depeding on minor_version, it can be: * 0: At least 8K, but less than 12K, from end of device * 1: At start of device * 2: 4K from start of device. */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?