md.c

来自「linux 内核源代码」· C语言 代码 · 共 2,595 行 · 第 1/5 页

C
2,595
字号
		MD_BUG();		return -EINVAL;	}	if (rdev->sb_loaded)		return 0;	if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ))		goto fail;	rdev->sb_loaded = 1;	return 0;fail:	printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",		bdevname(rdev->bdev,b));	return -EINVAL;}static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2){	if (	(sb1->set_uuid0 == sb2->set_uuid0) &&		(sb1->set_uuid1 == sb2->set_uuid1) &&		(sb1->set_uuid2 == sb2->set_uuid2) &&		(sb1->set_uuid3 == sb2->set_uuid3))		return 1;	return 0;}static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2){	int ret;	mdp_super_t *tmp1, *tmp2;	tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);	tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);	if (!tmp1 || !tmp2) {		ret = 0;		printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");		goto abort;	}	*tmp1 = *sb1;	*tmp2 = *sb2;	/*	 * nr_disks is not constant	 */	tmp1->nr_disks = 0;	tmp2->nr_disks = 0;	if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))		ret = 0;	else		ret = 1;abort:	kfree(tmp1);	kfree(tmp2);	return ret;}static u32 md_csum_fold(u32 csum){	csum = (csum & 0xffff) + (csum >> 16);	return (csum & 0xffff) + (csum >> 16);}static unsigned int calc_sb_csum(mdp_super_t * sb){	u64 newcsum = 0;	u32 *sb32 = (u32*)sb;	int i;	unsigned int disk_csum, csum;	disk_csum = sb->sb_csum;	sb->sb_csum = 0;	for (i = 0; i < MD_SB_BYTES/4 ; i++)		newcsum += sb32[i];	csum = (newcsum & 0xffffffff) + (newcsum>>32);#ifdef CONFIG_ALPHA	/* This used to use csum_partial, which was wrong for several	 * reasons including that different results are returned on	 * different architectures.  It isn't critical that we get exactly	 * the same return value as before (we always csum_fold before	 * testing, and that removes any differences).  However as we	 * know that csum_partial always returned a 16bit value on	 * alphas, do a fold to maximise conformity to previous behaviour.	 */	sb->sb_csum = md_csum_fold(disk_csum);#else	sb->sb_csum = disk_csum;#endif	return csum;}/* * Handle superblock details. * We want to be able to handle multiple superblock formats * so we have a common interface to them all, and an array of * different handlers. * We rely on user-space to write the initial superblock, and support * reading and updating of superblocks. * Interface methods are: *   int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev, int minor_version) *      loads and validates a superblock on dev. *      if refdev != NULL, compare superblocks on both devices *    Return: *      0 - dev has a superblock that is compatible with refdev *      1 - dev has a superblock that is compatible and newer than refdev *          so dev should be used as the refdev in future *     -EINVAL superblock incompatible or invalid *     -othererror e.g. -EIO * *   int validate_super(mddev_t *mddev, mdk_rdev_t *dev) *      Verify that dev is acceptable into mddev. *       The first time, mddev->raid_disks will be 0, and data from *       dev should be merged in.  Subsequent calls check that dev *       is new enough.  Return 0 or -EINVAL * *   void sync_super(mddev_t *mddev, mdk_rdev_t *dev) *     Update the superblock for rdev with data in mddev *     This does not write to disc. * */struct super_type  {	char 		*name;	struct module	*owner;	int		(*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version);	int		(*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);	void		(*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);};/* * load_super for 0.90.0  */static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version){	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];	mdp_super_t *sb;	int ret;	sector_t sb_offset;	/*	 * Calculate the position of the superblock,	 * it's at the end of the disk.	 *	 * It also happens to be a multiple of 4Kb.	 */	sb_offset = calc_dev_sboffset(rdev->bdev);	rdev->sb_offset = sb_offset;	ret = read_disk_sb(rdev, MD_SB_BYTES);	if (ret) return ret;	ret = -EINVAL;	bdevname(rdev->bdev, b);	sb = (mdp_super_t*)page_address(rdev->sb_page);	if (sb->md_magic != MD_SB_MAGIC) {		printk(KERN_ERR "md: invalid raid superblock magic on %s\n",		       b);		goto abort;	}	if (sb->major_version != 0 ||	    sb->minor_version < 90 ||	    sb->minor_version > 91) {		printk(KERN_WARNING "Bad version number %d.%d on %s\n",			sb->major_version, sb->minor_version,			b);		goto abort;	}	if (sb->raid_disks <= 0)		goto abort;	if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {		printk(KERN_WARNING "md: invalid superblock checksum on %s\n",			b);		goto abort;	}	rdev->preferred_minor = sb->md_minor;	rdev->data_offset = 0;	rdev->sb_size = MD_SB_BYTES;	if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {		if (sb->level != 1 && sb->level != 4		    && sb->level != 5 && sb->level != 6		    && sb->level != 10) {			/* FIXME use a better test */			printk(KERN_WARNING			       "md: bitmaps not supported for this level.\n");			goto abort;		}	}	if (sb->level == LEVEL_MULTIPATH)		rdev->desc_nr = -1;	else		rdev->desc_nr = sb->this_disk.number;	if (refdev == 0)		ret = 1;	else {		__u64 ev1, ev2;		mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);		if (!uuid_equal(refsb, sb)) {			printk(KERN_WARNING "md: %s has different UUID to %s\n",				b, bdevname(refdev->bdev,b2));			goto abort;		}		if (!sb_equal(refsb, sb)) {			printk(KERN_WARNING "md: %s has same UUID"			       " but different superblock to %s\n",			       b, bdevname(refdev->bdev, b2));			goto abort;		}		ev1 = md_event(sb);		ev2 = md_event(refsb);		if (ev1 > ev2)			ret = 1;		else 			ret = 0;	}	rdev->size = calc_dev_size(rdev, sb->chunk_size);	if (rdev->size < sb->size && sb->level > 1)		/* "this cannot possibly happen" ... */		ret = -EINVAL; abort:	return ret;}/* * validate_super for 0.90.0 */static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev){	mdp_disk_t *desc;	mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);	__u64 ev1 = md_event(sb);	rdev->raid_disk = -1;	rdev->flags = 0;	if (mddev->raid_disks == 0) {		mddev->major_version = 0;		mddev->minor_version = sb->minor_version;		mddev->patch_version = sb->patch_version;		mddev->persistent = ! sb->not_persistent;		mddev->chunk_size = sb->chunk_size;		mddev->ctime = sb->ctime;		mddev->utime = sb->utime;		mddev->level = sb->level;		mddev->clevel[0] = 0;		mddev->layout = sb->layout;		mddev->raid_disks = sb->raid_disks;		mddev->size = sb->size;		mddev->events = ev1;		mddev->bitmap_offset = 0;		mddev->default_bitmap_offset = MD_SB_BYTES >> 9;		if (mddev->minor_version >= 91) {			mddev->reshape_position = sb->reshape_position;			mddev->delta_disks = sb->delta_disks;			mddev->new_level = sb->new_level;			mddev->new_layout = sb->new_layout;			mddev->new_chunk = sb->new_chunk;		} else {			mddev->reshape_position = MaxSector;			mddev->delta_disks = 0;			mddev->new_level = mddev->level;			mddev->new_layout = mddev->layout;			mddev->new_chunk = mddev->chunk_size;		}		if (sb->state & (1<<MD_SB_CLEAN))			mddev->recovery_cp = MaxSector;		else {			if (sb->events_hi == sb->cp_events_hi && 				sb->events_lo == sb->cp_events_lo) {				mddev->recovery_cp = sb->recovery_cp;			} else				mddev->recovery_cp = 0;		}		memcpy(mddev->uuid+0, &sb->set_uuid0, 4);		memcpy(mddev->uuid+4, &sb->set_uuid1, 4);		memcpy(mddev->uuid+8, &sb->set_uuid2, 4);		memcpy(mddev->uuid+12,&sb->set_uuid3, 4);		mddev->max_disks = MD_SB_DISKS;		if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&		    mddev->bitmap_file == NULL)			mddev->bitmap_offset = mddev->default_bitmap_offset;	} else if (mddev->pers == NULL) {		/* Insist on good event counter while assembling */		++ev1;		if (ev1 < mddev->events) 			return -EINVAL;	} else if (mddev->bitmap) {		/* if adding to array with a bitmap, then we can accept an		 * older device ... but not too old.		 */		if (ev1 < mddev->bitmap->events_cleared)			return 0;	} else {		if (ev1 < mddev->events)			/* just a hot-add of a new device, leave raid_disk at -1 */			return 0;	}	if (mddev->level != LEVEL_MULTIPATH) {		desc = sb->disks + rdev->desc_nr;		if (desc->state & (1<<MD_DISK_FAULTY))			set_bit(Faulty, &rdev->flags);		else if (desc->state & (1<<MD_DISK_SYNC) /* &&			    desc->raid_disk < mddev->raid_disks */) {			set_bit(In_sync, &rdev->flags);			rdev->raid_disk = desc->raid_disk;		}		if (desc->state & (1<<MD_DISK_WRITEMOSTLY))			set_bit(WriteMostly, &rdev->flags);	} else /* MULTIPATH are always insync */		set_bit(In_sync, &rdev->flags);	return 0;}/* * sync_super for 0.90.0 */static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev){	mdp_super_t *sb;	struct list_head *tmp;	mdk_rdev_t *rdev2;	int next_spare = mddev->raid_disks;	/* make rdev->sb match mddev data..	 *	 * 1/ zero out disks	 * 2/ Add info for each disk, keeping track of highest desc_nr (next_spare);	 * 3/ any empty disks < next_spare become removed	 *	 * disks[0] gets initialised to REMOVED because	 * we cannot be sure from other fields if it has	 * been initialised or not.	 */	int i;	int active=0, working=0,failed=0,spare=0,nr_disks=0;	rdev->sb_size = MD_SB_BYTES;	sb = (mdp_super_t*)page_address(rdev->sb_page);	memset(sb, 0, sizeof(*sb));	sb->md_magic = MD_SB_MAGIC;	sb->major_version = mddev->major_version;	sb->patch_version = mddev->patch_version;	sb->gvalid_words  = 0; /* ignored */	memcpy(&sb->set_uuid0, mddev->uuid+0, 4);	memcpy(&sb->set_uuid1, mddev->uuid+4, 4);	memcpy(&sb->set_uuid2, mddev->uuid+8, 4);	memcpy(&sb->set_uuid3, mddev->uuid+12,4);	sb->ctime = mddev->ctime;	sb->level = mddev->level;	sb->size  = mddev->size;	sb->raid_disks = mddev->raid_disks;	sb->md_minor = mddev->md_minor;	sb->not_persistent = !mddev->persistent;	sb->utime = mddev->utime;	sb->state = 0;	sb->events_hi = (mddev->events>>32);	sb->events_lo = (u32)mddev->events;	if (mddev->reshape_position == MaxSector)		sb->minor_version = 90;	else {		sb->minor_version = 91;		sb->reshape_position = mddev->reshape_position;		sb->new_level = mddev->new_level;		sb->delta_disks = mddev->delta_disks;		sb->new_layout = mddev->new_layout;		sb->new_chunk = mddev->new_chunk;	}	mddev->minor_version = sb->minor_version;	if (mddev->in_sync)	{		sb->recovery_cp = mddev->recovery_cp;		sb->cp_events_hi = (mddev->events>>32);		sb->cp_events_lo = (u32)mddev->events;		if (mddev->recovery_cp == MaxSector)			sb->state = (1<< MD_SB_CLEAN);	} else		sb->recovery_cp = 0;	sb->layout = mddev->layout;	sb->chunk_size = mddev->chunk_size;	if (mddev->bitmap && mddev->bitmap_file == NULL)		sb->state |= (1<<MD_SB_BITMAP_PRESENT);	sb->disks[0].state = (1<<MD_DISK_REMOVED);	ITERATE_RDEV(mddev,rdev2,tmp) {		mdp_disk_t *d;		int desc_nr;		if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)		    && !test_bit(Faulty, &rdev2->flags))			desc_nr = rdev2->raid_disk;		else			desc_nr = next_spare++;		rdev2->desc_nr = desc_nr;		d = &sb->disks[rdev2->desc_nr];		nr_disks++;		d->number = rdev2->desc_nr;		d->major = MAJOR(rdev2->bdev->bd_dev);		d->minor = MINOR(rdev2->bdev->bd_dev);		if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)		    && !test_bit(Faulty, &rdev2->flags))			d->raid_disk = rdev2->raid_disk;		else			d->raid_disk = rdev2->desc_nr; /* compatibility */		if (test_bit(Faulty, &rdev2->flags))			d->state = (1<<MD_DISK_FAULTY);		else if (test_bit(In_sync, &rdev2->flags)) {			d->state = (1<<MD_DISK_ACTIVE);			d->state |= (1<<MD_DISK_SYNC);			active++;			working++;		} else {			d->state = 0;			spare++;			working++;		}		if (test_bit(WriteMostly, &rdev2->flags))			d->state |= (1<<MD_DISK_WRITEMOSTLY);	}	/* now set the "removed" and "faulty" bits on any missing devices */	for (i=0 ; i < mddev->raid_disks ; i++) {		mdp_disk_t *d = &sb->disks[i];		if (d->state == 0 && d->number == 0) {			d->number = i;			d->raid_disk = i;			d->state = (1<<MD_DISK_REMOVED);			d->state |= (1<<MD_DISK_FAULTY);			failed++;		}	}	sb->nr_disks = nr_disks;	sb->active_disks = active;	sb->working_disks = working;	sb->failed_disks = failed;	sb->spare_disks = spare;	sb->this_disk = sb->disks[rdev->desc_nr];	sb->sb_csum = calc_sb_csum(sb);}/* * version 1 superblock */static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb){	__le32 disk_csum;	u32 csum;	unsigned long long newcsum;	int size = 256 + le32_to_cpu(sb->max_dev)*2;	__le32 *isuper = (__le32*)sb;	int i;	disk_csum = sb->sb_csum;	sb->sb_csum = 0;	newcsum = 0;	for (i=0; size>=4; size -= 4 )		newcsum += le32_to_cpu(*isuper++);	if (size == 2)		newcsum += le16_to_cpu(*(__le16*) isuper);	csum = (newcsum & 0xffffffff) + (newcsum >> 32);	sb->sb_csum = disk_csum;	return cpu_to_le32(csum);}static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version){	struct mdp_superblock_1 *sb;	int ret;	sector_t sb_offset;	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];	int bmask;	/*	 * Calculate the position of the superblock.	 * It is always aligned to a 4K boundary and	 * depeding on minor_version, it can be:	 * 0: At least 8K, but less than 12K, from end of device	 * 1: At start of device	 * 2: 4K from start of device.	 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?