md.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,796 行 · 第 1/5 页

C
2,796
字号
/*   md.c : Multiple Devices driver for Linux	  Copyright (C) 1998, 1999, 2000 Ingo Molnar     completely rewritten, based on the MD driver code from Marc Zyngier   Changes:   - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar   - RAID-6 extensions by H. Peter Anvin <hpa@zytor.com>   - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>   - kerneld support by Boris Tobotras <boris@xtalk.msk.su>   - kmod support by: Cyrus Durgin   - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com>   - Devfs support by Richard Gooch <rgooch@atnf.csiro.au>   - lots of fixes and improvements to the RAID1/RAID5 and generic     RAID code (such as request based resynchronization):     Neil Brown <neilb@cse.unsw.edu.au>.   This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2, or (at your option)   any later version.   You should have received a copy of the GNU General Public License   (for example /usr/src/linux/COPYING); if not, write to the Free   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include <linux/module.h>#include <linux/config.h>#include <linux/linkage.h>#include <linux/raid/md.h>#include <linux/sysctl.h>#include <linux/devfs_fs_kernel.h>#include <linux/buffer_head.h> /* for invalidate_bdev */#include <linux/suspend.h>#include <linux/init.h>#ifdef CONFIG_KMOD#include <linux/kmod.h>#endif#include <asm/unaligned.h>#define MAJOR_NR MD_MAJOR#define MD_DRIVER/* 63 partitions with the alternate major number (mdp) */#define MdpMinorShift 6#define DEBUG 0#define dprintk(x...) ((void)(DEBUG && printk(x)))#ifndef MODULEstatic void autostart_arrays (int part);#endifstatic mdk_personality_t *pers[MAX_PERSONALITY];static spinlock_t pers_lock = SPIN_LOCK_UNLOCKED;/* * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * is 1000 KB/sec, so the extra system load does not show up that much. * Increase it if you want to have more _guaranteed_ speed. Note that * the RAID driver will use the maximum available bandwith if the IO * subsystem is idle. There is also an 'absolute maximum' reconstruction * speed limit - in case reconstruction slows down your system despite * idle IO detection. * * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. */static int sysctl_speed_limit_min = 1000;static int sysctl_speed_limit_max = 200000;static struct ctl_table_header *raid_table_header;static ctl_table raid_table[] = {	{		.ctl_name	= DEV_RAID_SPEED_LIMIT_MIN,		.procname	= "speed_limit_min",		.data		= &sysctl_speed_limit_min,		.maxlen		= sizeof(int),		.mode		= 0644,		.proc_handler	= &proc_dointvec,	},	{		.ctl_name	= DEV_RAID_SPEED_LIMIT_MAX,		.procname	= "speed_limit_max",		.data		= &sysctl_speed_limit_max,		.maxlen		= sizeof(int),		.mode		= 0644,		.proc_handler	= &proc_dointvec,	},	{ .ctl_name = 0 }};static ctl_table raid_dir_table[] = {	{		.ctl_name	= DEV_RAID,		.procname	= "raid",		.maxlen		= 0,		.mode		= 0555,		.child		= raid_table,	},	{ .ctl_name = 0 }};static ctl_table raid_root_table[] = {	{		.ctl_name	= CTL_DEV,		.procname	= "dev",		.maxlen		= 0,		.mode		= 0555,		.child		= raid_dir_table,	},	{ .ctl_name = 0 }};static struct block_device_operations md_fops;/* * Enables to iterate over all existing md arrays * all_mddevs_lock protects this list. */static LIST_HEAD(all_mddevs);static spinlock_t all_mddevs_lock = SPIN_LOCK_UNLOCKED;/* * iterates through all used mddevs in the system. * We take care to grab the all_mddevs_lock whenever navigating * the list, and to always hold a refcount when unlocked. * Any code which breaks out of this loop while own * a reference to the current mddev and must mddev_put it. */#define ITERATE_MDDEV(mddev,tmp)					\									\	for (({ spin_lock(&all_mddevs_lock); 				\		tmp = all_mddevs.next;					\		mddev = NULL;});					\	     ({ if (tmp != &all_mddevs)					\			mddev_get(list_entry(tmp, mddev_t, all_mddevs));\		spin_unlock(&all_mddevs_lock);				\		if (mddev) mddev_put(mddev);				\		mddev = list_entry(tmp, mddev_t, all_mddevs);		\		tmp != &all_mddevs;});					\	     ({ spin_lock(&all_mddevs_lock);				\		tmp = tmp->next;})					\		)int md_flush_mddev(mddev_t *mddev, sector_t *error_sector){	struct list_head *tmp;	mdk_rdev_t *rdev;	int ret = 0;	/*	 * this list iteration is done without any locking in md?!	 */	ITERATE_RDEV(mddev, rdev, tmp) {		request_queue_t *r_queue = bdev_get_queue(rdev->bdev);		int err;		if (!r_queue->issue_flush_fn)			err = -EOPNOTSUPP;		else			err = r_queue->issue_flush_fn(r_queue, rdev->bdev->bd_disk, error_sector);		if (!ret)			ret = err;	}	return ret;}static int md_flush_all(request_queue_t *q, struct gendisk *disk,			 sector_t *error_sector){	mddev_t *mddev = q->queuedata;	return md_flush_mddev(mddev, error_sector);}static int md_fail_request (request_queue_t *q, struct bio *bio){	bio_io_error(bio, bio->bi_size);	return 0;}static inline mddev_t *mddev_get(mddev_t *mddev){	atomic_inc(&mddev->active);	return mddev;}static void mddev_put(mddev_t *mddev){	if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))		return;	if (!mddev->raid_disks && list_empty(&mddev->disks)) {		list_del(&mddev->all_mddevs);		blk_put_queue(mddev->queue);		kfree(mddev);	}	spin_unlock(&all_mddevs_lock);}static mddev_t * mddev_find(dev_t unit){	mddev_t *mddev, *new = NULL; retry:	spin_lock(&all_mddevs_lock);	list_for_each_entry(mddev, &all_mddevs, all_mddevs)		if (mddev->unit == unit) {			mddev_get(mddev);			spin_unlock(&all_mddevs_lock);			if (new)				kfree(new);			return mddev;		}	if (new) {		list_add(&new->all_mddevs, &all_mddevs);		spin_unlock(&all_mddevs_lock);		return new;	}	spin_unlock(&all_mddevs_lock);	new = (mddev_t *) kmalloc(sizeof(*new), GFP_KERNEL);	if (!new)		return NULL;	memset(new, 0, sizeof(*new));	new->unit = unit;	if (MAJOR(unit) == MD_MAJOR)		new->md_minor = MINOR(unit);	else		new->md_minor = MINOR(unit) >> MdpMinorShift;	init_MUTEX(&new->reconfig_sem);	INIT_LIST_HEAD(&new->disks);	INIT_LIST_HEAD(&new->all_mddevs);	init_timer(&new->safemode_timer);	atomic_set(&new->active, 1);	new->queue = blk_alloc_queue(GFP_KERNEL);	if (!new->queue) {		kfree(new);		return NULL;	}	blk_queue_make_request(new->queue, md_fail_request);	goto retry;}static inline int mddev_lock(mddev_t * mddev){	return down_interruptible(&mddev->reconfig_sem);}static inline void mddev_lock_uninterruptible(mddev_t * mddev){	down(&mddev->reconfig_sem);}static inline int mddev_trylock(mddev_t * mddev){	return down_trylock(&mddev->reconfig_sem);}static inline void mddev_unlock(mddev_t * mddev){	up(&mddev->reconfig_sem);	if (mddev->thread)		md_wakeup_thread(mddev->thread);}mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr){	mdk_rdev_t * rdev;	struct list_head *tmp;	ITERATE_RDEV(mddev,rdev,tmp) {		if (rdev->desc_nr == nr)			return rdev;	}	return NULL;}static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev){	struct list_head *tmp;	mdk_rdev_t *rdev;	ITERATE_RDEV(mddev,rdev,tmp) {		if (rdev->bdev->bd_dev == dev)			return rdev;	}	return NULL;}inline static sector_t calc_dev_sboffset(struct block_device *bdev){	sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;	return MD_NEW_SIZE_BLOCKS(size);}static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size){	sector_t size;	size = rdev->sb_offset;	if (chunk_size)		size &= ~((sector_t)chunk_size/1024 - 1);	return size;}static int alloc_disk_sb(mdk_rdev_t * rdev){	if (rdev->sb_page)		MD_BUG();	rdev->sb_page = alloc_page(GFP_KERNEL);	if (!rdev->sb_page) {		printk(KERN_ALERT "md: out of memory.\n");		return -EINVAL;	}	return 0;}static void free_disk_sb(mdk_rdev_t * rdev){	if (rdev->sb_page) {		page_cache_release(rdev->sb_page);		rdev->sb_loaded = 0;		rdev->sb_page = NULL;		rdev->sb_offset = 0;		rdev->size = 0;	}}static int bi_complete(struct bio *bio, unsigned int bytes_done, int error){	if (bio->bi_size)		return 1;	complete((struct completion*)bio->bi_private);	return 0;}static int sync_page_io(struct block_device *bdev, sector_t sector, int size,		   struct page *page, int rw){	struct bio bio;	struct bio_vec vec;	struct completion event;	rw |= (1 << BIO_RW_SYNC);	bio_init(&bio);	bio.bi_io_vec = &vec;	vec.bv_page = page;	vec.bv_len = size;	vec.bv_offset = 0;	bio.bi_vcnt = 1;	bio.bi_idx = 0;	bio.bi_size = size;	bio.bi_bdev = bdev;	bio.bi_sector = sector;	init_completion(&event);	bio.bi_private = &event;	bio.bi_end_io = bi_complete;	submit_bio(rw, &bio);	wait_for_completion(&event);	return test_bit(BIO_UPTODATE, &bio.bi_flags);}static int read_disk_sb(mdk_rdev_t * rdev){	char b[BDEVNAME_SIZE];	if (!rdev->sb_page) {		MD_BUG();		return -EINVAL;	}	if (rdev->sb_loaded)		return 0;	if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))		goto fail;	rdev->sb_loaded = 1;	return 0;fail:	printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",		bdevname(rdev->bdev,b));	return -EINVAL;}static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2){	if (	(sb1->set_uuid0 == sb2->set_uuid0) &&		(sb1->set_uuid1 == sb2->set_uuid1) &&		(sb1->set_uuid2 == sb2->set_uuid2) &&		(sb1->set_uuid3 == sb2->set_uuid3))		return 1;	return 0;}static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2){	int ret;	mdp_super_t *tmp1, *tmp2;	tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);	tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);	if (!tmp1 || !tmp2) {		ret = 0;		printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");		goto abort;	}	*tmp1 = *sb1;	*tmp2 = *sb2;	/*	 * nr_disks is not constant	 */	tmp1->nr_disks = 0;	tmp2->nr_disks = 0;	if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))		ret = 0;	else		ret = 1;abort:	if (tmp1)		kfree(tmp1);	if (tmp2)		kfree(tmp2);	return ret;}static unsigned int calc_sb_csum(mdp_super_t * sb){	unsigned int disk_csum, csum;	disk_csum = sb->sb_csum;	sb->sb_csum = 0;	csum = csum_partial((void *)sb, MD_SB_BYTES, 0);	sb->sb_csum = disk_csum;	return csum;}/* csum_partial is not consistent between different architectures. * Some (i386) do a 32bit csum.  Some (alpha) do 16 bit. * This makes it hard for user-space to know what to do. * So we use calc_sb_csum to set the checksum to allow working * with older kernels, but allow calc_sb_csum_common to * be used when checking if a checksum is correct, to * make life easier for user-space tools that might write * a superblock. */static unsigned int calc_sb_csum_common(mdp_super_t *super){	unsigned int  disk_csum = super->sb_csum;	unsigned long long newcsum = 0;	unsigned int csum;	int i;	unsigned int *superc = (int*) super;	super->sb_csum = 0;	for (i=0; i<MD_SB_BYTES/4; i++)		newcsum+= superc[i];	csum = (newcsum& 0xffffffff) + (newcsum>>32);	super->sb_csum = disk_csum;	return csum;}/* * Handle superblock details. * We want to be able to handle multiple superblock formats * so we have a common interface to them all, and an array of * different handlers. * We rely on user-space to write the initial superblock, and support * reading and updating of superblocks. * Interface methods are: *   int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev, int minor_version) *      loads and validates a superblock on dev. *      if refdev != NULL, compare superblocks on both devices *    Return: *      0 - dev has a superblock that is compatible with refdev *      1 - dev has a superblock that is compatible and newer than refdev *          so dev should be used as the refdev in future *     -EINVAL superblock incompatible or invalid *     -othererror e.g. -EIO * *   int validate_super(mddev_t *mddev, mdk_rdev_t *dev) *      Verify that dev is acceptable into mddev. *       The first time, mddev->raid_disks will be 0, and data from *       dev should be merged in.  Subsequent calls check that dev *       is new enough.  Return 0 or -EINVAL * *   void sync_super(mddev_t *mddev, mdk_rdev_t *dev) *     Update the superblock for rdev with data in mddev *     This does not write to disc. * */struct super_type  {	char 		*name;	struct module	*owner;	int		(*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version);	int		(*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);	void		(*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);};/* * load_super for 0.90.0  */static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version){	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];	mdp_super_t *sb;	int ret;	sector_t sb_offset;	/*	 * Calculate the position of the superblock,	 * it's at the end of the disk.	 *	 * It also happens to be a multiple of 4Kb.	 */	sb_offset = calc_dev_sboffset(rdev->bdev);	rdev->sb_offset = sb_offset;	ret = read_disk_sb(rdev);	if (ret) return ret;	ret = -EINVAL;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?