md.c

来自「linux 内核源代码」· C语言 代码 · 共 2,595 行 · 第 1/5 页

C
2,595
字号
	switch(minor_version) {	case 0:		sb_offset = rdev->bdev->bd_inode->i_size >> 9;		sb_offset -= 8*2;		sb_offset &= ~(sector_t)(4*2-1);		/* convert from sectors to K */		sb_offset /= 2;		break;	case 1:		sb_offset = 0;		break;	case 2:		sb_offset = 4;		break;	default:		return -EINVAL;	}	rdev->sb_offset = sb_offset;	/* superblock is rarely larger than 1K, but it can be larger,	 * and it is safe to read 4k, so we do that	 */	ret = read_disk_sb(rdev, 4096);	if (ret) return ret;	sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);	if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||	    sb->major_version != cpu_to_le32(1) ||	    le32_to_cpu(sb->max_dev) > (4096-256)/2 ||	    le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) ||	    (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)		return -EINVAL;	if (calc_sb_1_csum(sb) != sb->sb_csum) {		printk("md: invalid superblock checksum on %s\n",			bdevname(rdev->bdev,b));		return -EINVAL;	}	if (le64_to_cpu(sb->data_size) < 10) {		printk("md: data_size too small on %s\n",		       bdevname(rdev->bdev,b));		return -EINVAL;	}	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {		if (sb->level != cpu_to_le32(1) &&		    sb->level != cpu_to_le32(4) &&		    sb->level != cpu_to_le32(5) &&		    sb->level != cpu_to_le32(6) &&		    sb->level != cpu_to_le32(10)) {			printk(KERN_WARNING			       "md: bitmaps not supported for this level.\n");			return -EINVAL;		}	}	rdev->preferred_minor = 0xffff;	rdev->data_offset = le64_to_cpu(sb->data_offset);	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;	if (rdev->sb_size & bmask)		rdev-> sb_size = (rdev->sb_size | bmask)+1;	if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))		rdev->desc_nr = -1;	else		rdev->desc_nr = le32_to_cpu(sb->dev_number);	if (refdev == 0)		ret = 1;	else {		__u64 ev1, ev2;		struct mdp_superblock_1 *refsb = 			(struct mdp_superblock_1*)page_address(refdev->sb_page);		if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||		    sb->level != refsb->level ||		    sb->layout != refsb->layout ||		    sb->chunksize != refsb->chunksize) {			printk(KERN_WARNING "md: %s has strangely different"				" superblock to %s\n",				bdevname(rdev->bdev,b),				bdevname(refdev->bdev,b2));			return -EINVAL;		}		ev1 = le64_to_cpu(sb->events);		ev2 = le64_to_cpu(refsb->events);		if (ev1 > ev2)			ret = 1;		else			ret = 0;	}	if (minor_version) 		rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;	else		rdev->size = rdev->sb_offset;	if (rdev->size < le64_to_cpu(sb->data_size)/2)		return -EINVAL;	rdev->size = le64_to_cpu(sb->data_size)/2;	if (le32_to_cpu(sb->chunksize))		rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);	if (le64_to_cpu(sb->size) > rdev->size*2)		return -EINVAL;	return ret;}static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev){	struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);	__u64 ev1 = le64_to_cpu(sb->events);	rdev->raid_disk = -1;	rdev->flags = 0;	if (mddev->raid_disks == 0) {		mddev->major_version = 1;		mddev->patch_version = 0;		mddev->persistent = 1;		mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;		mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);		mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);		mddev->level = le32_to_cpu(sb->level);		mddev->clevel[0] = 0;		mddev->layout = le32_to_cpu(sb->layout);		mddev->raid_disks = le32_to_cpu(sb->raid_disks);		mddev->size = le64_to_cpu(sb->size)/2;		mddev->events = ev1;		mddev->bitmap_offset = 0;		mddev->default_bitmap_offset = 1024 >> 9;				mddev->recovery_cp = le64_to_cpu(sb->resync_offset);		memcpy(mddev->uuid, sb->set_uuid, 16);		mddev->max_disks =  (4096-256)/2;		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&		    mddev->bitmap_file == NULL )			mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {			mddev->reshape_position = le64_to_cpu(sb->reshape_position);			mddev->delta_disks = le32_to_cpu(sb->delta_disks);			mddev->new_level = le32_to_cpu(sb->new_level);			mddev->new_layout = le32_to_cpu(sb->new_layout);			mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;		} else {			mddev->reshape_position = MaxSector;			mddev->delta_disks = 0;			mddev->new_level = mddev->level;			mddev->new_layout = mddev->layout;			mddev->new_chunk = mddev->chunk_size;		}	} else if (mddev->pers == NULL) {		/* Insist of good event counter while assembling */		++ev1;		if (ev1 < mddev->events)			return -EINVAL;	} else if (mddev->bitmap) {		/* If adding to array with a bitmap, then we can accept an		 * older device, but not too old.		 */		if (ev1 < mddev->bitmap->events_cleared)			return 0;	} else {		if (ev1 < mddev->events)			/* just a hot-add of a new device, leave raid_disk at -1 */			return 0;	}	if (mddev->level != LEVEL_MULTIPATH) {		int role;		role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);		switch(role) {		case 0xffff: /* spare */			break;		case 0xfffe: /* faulty */			set_bit(Faulty, &rdev->flags);			break;		default:			if ((le32_to_cpu(sb->feature_map) &			     MD_FEATURE_RECOVERY_OFFSET))				rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);			else				set_bit(In_sync, &rdev->flags);			rdev->raid_disk = role;			break;		}		if (sb->devflags & WriteMostly1)			set_bit(WriteMostly, &rdev->flags);	} else /* MULTIPATH are always insync */		set_bit(In_sync, &rdev->flags);	return 0;}static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev){	struct mdp_superblock_1 *sb;	struct list_head *tmp;	mdk_rdev_t *rdev2;	int max_dev, i;	/* make rdev->sb match mddev and rdev data. */	sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);	sb->feature_map = 0;	sb->pad0 = 0;	sb->recovery_offset = cpu_to_le64(0);	memset(sb->pad1, 0, sizeof(sb->pad1));	memset(sb->pad2, 0, sizeof(sb->pad2));	memset(sb->pad3, 0, sizeof(sb->pad3));	sb->utime = cpu_to_le64((__u64)mddev->utime);	sb->events = cpu_to_le64(mddev->events);	if (mddev->in_sync)		sb->resync_offset = cpu_to_le64(mddev->recovery_cp);	else		sb->resync_offset = cpu_to_le64(0);	sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));	sb->raid_disks = cpu_to_le32(mddev->raid_disks);	sb->size = cpu_to_le64(mddev->size<<1);	if (mddev->bitmap && mddev->bitmap_file == NULL) {		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);	}	if (rdev->raid_disk >= 0 &&	    !test_bit(In_sync, &rdev->flags) &&	    rdev->recovery_offset > 0) {		sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);		sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);	}	if (mddev->reshape_position != MaxSector) {		sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);		sb->reshape_position = cpu_to_le64(mddev->reshape_position);		sb->new_layout = cpu_to_le32(mddev->new_layout);		sb->delta_disks = cpu_to_le32(mddev->delta_disks);		sb->new_level = cpu_to_le32(mddev->new_level);		sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);	}	max_dev = 0;	ITERATE_RDEV(mddev,rdev2,tmp)		if (rdev2->desc_nr+1 > max_dev)			max_dev = rdev2->desc_nr+1;	if (max_dev > le32_to_cpu(sb->max_dev))		sb->max_dev = cpu_to_le32(max_dev);	for (i=0; i<max_dev;i++)		sb->dev_roles[i] = cpu_to_le16(0xfffe);		ITERATE_RDEV(mddev,rdev2,tmp) {		i = rdev2->desc_nr;		if (test_bit(Faulty, &rdev2->flags))			sb->dev_roles[i] = cpu_to_le16(0xfffe);		else if (test_bit(In_sync, &rdev2->flags))			sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);		else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)			sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);		else			sb->dev_roles[i] = cpu_to_le16(0xffff);	}	sb->sb_csum = calc_sb_1_csum(sb);}static struct super_type super_types[] = {	[0] = {		.name	= "0.90.0",		.owner	= THIS_MODULE,		.load_super	= super_90_load,		.validate_super	= super_90_validate,		.sync_super	= super_90_sync,	},	[1] = {		.name	= "md-1",		.owner	= THIS_MODULE,		.load_super	= super_1_load,		.validate_super	= super_1_validate,		.sync_super	= super_1_sync,	},};static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2){	struct list_head *tmp, *tmp2;	mdk_rdev_t *rdev, *rdev2;	ITERATE_RDEV(mddev1,rdev,tmp)		ITERATE_RDEV(mddev2, rdev2, tmp2)			if (rdev->bdev->bd_contains ==			    rdev2->bdev->bd_contains)				return 1;	return 0;}static LIST_HEAD(pending_raid_disks);static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev){	char b[BDEVNAME_SIZE];	struct kobject *ko;	char *s;	int err;	if (rdev->mddev) {		MD_BUG();		return -EINVAL;	}	/* make sure rdev->size exceeds mddev->size */	if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {		if (mddev->pers) {			/* Cannot change size, so fail			 * If mddev->level <= 0, then we don't care			 * about aligning sizes (e.g. linear)			 */			if (mddev->level > 0)				return -ENOSPC;		} else			mddev->size = rdev->size;	}	/* Verify rdev->desc_nr is unique.	 * If it is -1, assign a free number, else	 * check number is not in use	 */	if (rdev->desc_nr < 0) {		int choice = 0;		if (mddev->pers) choice = mddev->raid_disks;		while (find_rdev_nr(mddev, choice))			choice++;		rdev->desc_nr = choice;	} else {		if (find_rdev_nr(mddev, rdev->desc_nr))			return -EBUSY;	}	bdevname(rdev->bdev,b);	if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0)		return -ENOMEM;	while ( (s=strchr(rdev->kobj.k_name, '/')) != NULL)		*s = '!';				rdev->mddev = mddev;	printk(KERN_INFO "md: bind<%s>\n", b);	rdev->kobj.parent = &mddev->kobj;	if ((err = kobject_add(&rdev->kobj)))		goto fail;	if (rdev->bdev->bd_part)		ko = &rdev->bdev->bd_part->kobj;	else		ko = &rdev->bdev->bd_disk->kobj;	if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {		kobject_del(&rdev->kobj);		goto fail;	}	list_add(&rdev->same_set, &mddev->disks);	bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);	return 0; fail:	printk(KERN_WARNING "md: failed to register dev-%s for %s\n",	       b, mdname(mddev));	return err;}static void delayed_delete(struct work_struct *ws){	mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);	kobject_del(&rdev->kobj);}static void unbind_rdev_from_array(mdk_rdev_t * rdev){	char b[BDEVNAME_SIZE];	if (!rdev->mddev) {		MD_BUG();		return;	}	bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);	list_del_init(&rdev->same_set);	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));	rdev->mddev = NULL;	sysfs_remove_link(&rdev->kobj, "block");	/* We need to delay this, otherwise we can deadlock when	 * writing to 'remove' to "dev/state"	 */	INIT_WORK(&rdev->del_work, delayed_delete);	schedule_work(&rdev->del_work);}/* * prevent the device from being mounted, repartitioned or * otherwise reused by a RAID array (or any other kernel * subsystem), by bd_claiming the device. */static int lock_rdev(mdk_rdev_t *rdev, dev_t dev){	int err = 0;	struct block_device *bdev;	char b[BDEVNAME_SIZE];	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);	if (IS_ERR(bdev)) {		printk(KERN_ERR "md: could not open %s.\n",			__bdevname(dev, b));		return PTR_ERR(bdev);	}	err = bd_claim(bdev, rdev);	if (err) {		printk(KERN_ERR "md: could not bd_claim %s.\n",			bdevname(bdev, b));		blkdev_put(bdev);		return err;	}	rdev->bdev = bdev;	return err;}static void unlock_rdev(mdk_rdev_t *rdev){	struct block_device *bdev = rdev->bdev;	rdev->bdev = NULL;	if (!bdev)		MD_BUG();	bd_release(bdev);	blkdev_put(bdev);}void md_autodetect_dev(dev_t dev);static void export_rdev(mdk_rdev_t * rdev){	char b[BDEVNAME_SIZE];	printk(KERN_INFO "md: export_rdev(%s)\n",		bdevname(rdev->bdev,b));	if (rdev->mddev)		MD_BUG();	free_disk_sb(rdev);	list_del_init(&rdev->same_set);#ifndef MODULE	md_autodetect_dev(rdev->bdev->bd_dev);#endif	unlock_rdev(rdev);	kobject_put(&rdev->kobj);}static void kick_rdev_from_array(mdk_rdev_t * rdev){	unbind_rdev_from_array(rdev);	export_rdev(rdev);}static void export_array(mddev_t *mddev){	struct list_head *tmp;	mdk_rdev_t *rdev;	ITERATE_RDEV(mddev,rdev,tmp) {		if (!rdev->mddev) {			MD_BUG();			continue;		}		kick_rdev_from_array(rdev);	}	if (!list_empty(&mddev->disks))		MD_BUG();	mddev->raid_disks = 0;	mddev->major_version = 0;}static void print_desc(mdp_disk_t *desc){	printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,		desc->major,desc->minor,desc->raid_disk,desc->state);}static void print_sb(mdp_super_t *sb){	int i;	printk(KERN_INFO 		"md:  SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",		sb->major_version, sb->minor_version, sb->patch_version,		sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,		sb->ctime);	printk(KERN_INFO "md:     L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",		sb->level, sb->size, sb->nr_disks, sb->raid_disks,		sb->md_minor, sb->layout, sb->chunk_size);	printk(KERN_INFO "md:     UT:%08x ST:%d AD:%d WD:%d"		" FD:%d SD:%d CSUM:%08x E:%08lx\n",		sb->utime, sb->state, sb->active_disks, sb->working_disks,		sb->failed_disks, sb->spare_disks,		sb->sb_csum, (unsigned long)sb->events_lo);	printk(KERN_INFO);	for (i = 0; i < MD_SB_DISKS; i++) {		mdp_disk_t *desc;		desc = sb->disks + i;		if (desc->number || desc->major || desc->minor ||		    desc->raid_disk || (desc->state && (desc->state != 4))) {			printk("     D %2d: ", i);			print_desc(desc);		}	}	printk(KERN_INFO "md:     THIS: ");

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?