md.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,796 行 · 第 1/5 页
C
2,796 行
/* md.c : Multiple Devices driver for Linux Copyright (C) 1998, 1999, 2000 Ingo Molnar completely rewritten, based on the MD driver code from Marc Zyngier Changes: - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar - RAID-6 extensions by H. Peter Anvin <hpa@zytor.com> - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net> - kerneld support by Boris Tobotras <boris@xtalk.msk.su> - kmod support by: Cyrus Durgin - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com> - Devfs support by Richard Gooch <rgooch@atnf.csiro.au> - lots of fixes and improvements to the RAID1/RAID5 and generic RAID code (such as request based resynchronization): Neil Brown <neilb@cse.unsw.edu.au>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include <linux/module.h>#include <linux/config.h>#include <linux/linkage.h>#include <linux/raid/md.h>#include <linux/sysctl.h>#include <linux/devfs_fs_kernel.h>#include <linux/buffer_head.h> /* for invalidate_bdev */#include <linux/suspend.h>#include <linux/init.h>#ifdef CONFIG_KMOD#include <linux/kmod.h>#endif#include <asm/unaligned.h>#define MAJOR_NR MD_MAJOR#define MD_DRIVER/* 63 partitions with the alternate major number (mdp) */#define MdpMinorShift 6#define DEBUG 0#define dprintk(x...) ((void)(DEBUG && printk(x)))#ifndef MODULEstatic void autostart_arrays (int part);#endifstatic mdk_personality_t *pers[MAX_PERSONALITY];static spinlock_t pers_lock = SPIN_LOCK_UNLOCKED;/* * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * is 1000 KB/sec, so the extra system load does not show up that much. * Increase it if you want to have more _guaranteed_ speed. Note that * the RAID driver will use the maximum available bandwith if the IO * subsystem is idle. There is also an 'absolute maximum' reconstruction * speed limit - in case reconstruction slows down your system despite * idle IO detection. * * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. */static int sysctl_speed_limit_min = 1000;static int sysctl_speed_limit_max = 200000;static struct ctl_table_header *raid_table_header;static ctl_table raid_table[] = { { .ctl_name = DEV_RAID_SPEED_LIMIT_MIN, .procname = "speed_limit_min", .data = &sysctl_speed_limit_min, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = DEV_RAID_SPEED_LIMIT_MAX, .procname = "speed_limit_max", .data = &sysctl_speed_limit_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = 0 }};static ctl_table raid_dir_table[] = { { .ctl_name = DEV_RAID, .procname = "raid", .maxlen = 0, .mode = 0555, .child = raid_table, }, { .ctl_name = 0 }};static ctl_table raid_root_table[] = { { .ctl_name = CTL_DEV, .procname = "dev", .maxlen = 0, .mode = 0555, .child = raid_dir_table, }, { .ctl_name = 0 }};static struct block_device_operations md_fops;/* * Enables to iterate over all existing md arrays * all_mddevs_lock protects this list. */static LIST_HEAD(all_mddevs);static spinlock_t all_mddevs_lock = SPIN_LOCK_UNLOCKED;/* * iterates through all used mddevs in the system. * We take care to grab the all_mddevs_lock whenever navigating * the list, and to always hold a refcount when unlocked. * Any code which breaks out of this loop while own * a reference to the current mddev and must mddev_put it. */#define ITERATE_MDDEV(mddev,tmp) \ \ for (({ spin_lock(&all_mddevs_lock); \ tmp = all_mddevs.next; \ mddev = NULL;}); \ ({ if (tmp != &all_mddevs) \ mddev_get(list_entry(tmp, mddev_t, all_mddevs));\ spin_unlock(&all_mddevs_lock); \ if (mddev) mddev_put(mddev); \ mddev = list_entry(tmp, mddev_t, all_mddevs); \ tmp != &all_mddevs;}); \ ({ spin_lock(&all_mddevs_lock); \ tmp = tmp->next;}) \ )int md_flush_mddev(mddev_t *mddev, sector_t *error_sector){ struct list_head *tmp; mdk_rdev_t *rdev; int ret = 0; /* * this list iteration is done without any locking in md?! */ ITERATE_RDEV(mddev, rdev, tmp) { request_queue_t *r_queue = bdev_get_queue(rdev->bdev); int err; if (!r_queue->issue_flush_fn) err = -EOPNOTSUPP; else err = r_queue->issue_flush_fn(r_queue, rdev->bdev->bd_disk, error_sector); if (!ret) ret = err; } return ret;}static int md_flush_all(request_queue_t *q, struct gendisk *disk, sector_t *error_sector){ mddev_t *mddev = q->queuedata; return md_flush_mddev(mddev, error_sector);}static int md_fail_request (request_queue_t *q, struct bio *bio){ bio_io_error(bio, bio->bi_size); return 0;}static inline mddev_t *mddev_get(mddev_t *mddev){ atomic_inc(&mddev->active); return mddev;}static void mddev_put(mddev_t *mddev){ if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); blk_put_queue(mddev->queue); kfree(mddev); } spin_unlock(&all_mddevs_lock);}static mddev_t * mddev_find(dev_t unit){ mddev_t *mddev, *new = NULL; retry: spin_lock(&all_mddevs_lock); list_for_each_entry(mddev, &all_mddevs, all_mddevs) if (mddev->unit == unit) { mddev_get(mddev); spin_unlock(&all_mddevs_lock); if (new) kfree(new); return mddev; } if (new) { list_add(&new->all_mddevs, &all_mddevs); spin_unlock(&all_mddevs_lock); return new; } spin_unlock(&all_mddevs_lock); new = (mddev_t *) kmalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; memset(new, 0, sizeof(*new)); new->unit = unit; if (MAJOR(unit) == MD_MAJOR) new->md_minor = MINOR(unit); else new->md_minor = MINOR(unit) >> MdpMinorShift; init_MUTEX(&new->reconfig_sem); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); atomic_set(&new->active, 1); new->queue = blk_alloc_queue(GFP_KERNEL); if (!new->queue) { kfree(new); return NULL; } blk_queue_make_request(new->queue, md_fail_request); goto retry;}static inline int mddev_lock(mddev_t * mddev){ return down_interruptible(&mddev->reconfig_sem);}static inline void mddev_lock_uninterruptible(mddev_t * mddev){ down(&mddev->reconfig_sem);}static inline int mddev_trylock(mddev_t * mddev){ return down_trylock(&mddev->reconfig_sem);}static inline void mddev_unlock(mddev_t * mddev){ up(&mddev->reconfig_sem); if (mddev->thread) md_wakeup_thread(mddev->thread);}mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr){ mdk_rdev_t * rdev; struct list_head *tmp; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == nr) return rdev; } return NULL;}static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev){ struct list_head *tmp; mdk_rdev_t *rdev; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->bdev->bd_dev == dev) return rdev; } return NULL;}inline static sector_t calc_dev_sboffset(struct block_device *bdev){ sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; return MD_NEW_SIZE_BLOCKS(size);}static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size){ sector_t size; size = rdev->sb_offset; if (chunk_size) size &= ~((sector_t)chunk_size/1024 - 1); return size;}static int alloc_disk_sb(mdk_rdev_t * rdev){ if (rdev->sb_page) MD_BUG(); rdev->sb_page = alloc_page(GFP_KERNEL); if (!rdev->sb_page) { printk(KERN_ALERT "md: out of memory.\n"); return -EINVAL; } return 0;}static void free_disk_sb(mdk_rdev_t * rdev){ if (rdev->sb_page) { page_cache_release(rdev->sb_page); rdev->sb_loaded = 0; rdev->sb_page = NULL; rdev->sb_offset = 0; rdev->size = 0; }}static int bi_complete(struct bio *bio, unsigned int bytes_done, int error){ if (bio->bi_size) return 1; complete((struct completion*)bio->bi_private); return 0;}static int sync_page_io(struct block_device *bdev, sector_t sector, int size, struct page *page, int rw){ struct bio bio; struct bio_vec vec; struct completion event; rw |= (1 << BIO_RW_SYNC); bio_init(&bio); bio.bi_io_vec = &vec; vec.bv_page = page; vec.bv_len = size; vec.bv_offset = 0; bio.bi_vcnt = 1; bio.bi_idx = 0; bio.bi_size = size; bio.bi_bdev = bdev; bio.bi_sector = sector; init_completion(&event); bio.bi_private = &event; bio.bi_end_io = bi_complete; submit_bio(rw, &bio); wait_for_completion(&event); return test_bit(BIO_UPTODATE, &bio.bi_flags);}static int read_disk_sb(mdk_rdev_t * rdev){ char b[BDEVNAME_SIZE]; if (!rdev->sb_page) { MD_BUG(); return -EINVAL; } if (rdev->sb_loaded) return 0; if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ)) goto fail; rdev->sb_loaded = 1; return 0;fail: printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n", bdevname(rdev->bdev,b)); return -EINVAL;}static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2){ if ( (sb1->set_uuid0 == sb2->set_uuid0) && (sb1->set_uuid1 == sb2->set_uuid1) && (sb1->set_uuid2 == sb2->set_uuid2) && (sb1->set_uuid3 == sb2->set_uuid3)) return 1; return 0;}static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2){ int ret; mdp_super_t *tmp1, *tmp2; tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL); tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL); if (!tmp1 || !tmp2) { ret = 0; printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n"); goto abort; } *tmp1 = *sb1; *tmp2 = *sb2; /* * nr_disks is not constant */ tmp1->nr_disks = 0; tmp2->nr_disks = 0; if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) ret = 0; else ret = 1;abort: if (tmp1) kfree(tmp1); if (tmp2) kfree(tmp2); return ret;}static unsigned int calc_sb_csum(mdp_super_t * sb){ unsigned int disk_csum, csum; disk_csum = sb->sb_csum; sb->sb_csum = 0; csum = csum_partial((void *)sb, MD_SB_BYTES, 0); sb->sb_csum = disk_csum; return csum;}/* csum_partial is not consistent between different architectures. * Some (i386) do a 32bit csum. Some (alpha) do 16 bit. * This makes it hard for user-space to know what to do. * So we use calc_sb_csum to set the checksum to allow working * with older kernels, but allow calc_sb_csum_common to * be used when checking if a checksum is correct, to * make life easier for user-space tools that might write * a superblock. */static unsigned int calc_sb_csum_common(mdp_super_t *super){ unsigned int disk_csum = super->sb_csum; unsigned long long newcsum = 0; unsigned int csum; int i; unsigned int *superc = (int*) super; super->sb_csum = 0; for (i=0; i<MD_SB_BYTES/4; i++) newcsum+= superc[i]; csum = (newcsum& 0xffffffff) + (newcsum>>32); super->sb_csum = disk_csum; return csum;}/* * Handle superblock details. * We want to be able to handle multiple superblock formats * so we have a common interface to them all, and an array of * different handlers. * We rely on user-space to write the initial superblock, and support * reading and updating of superblocks. * Interface methods are: * int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev, int minor_version) * loads and validates a superblock on dev. * if refdev != NULL, compare superblocks on both devices * Return: * 0 - dev has a superblock that is compatible with refdev * 1 - dev has a superblock that is compatible and newer than refdev * so dev should be used as the refdev in future * -EINVAL superblock incompatible or invalid * -othererror e.g. -EIO * * int validate_super(mddev_t *mddev, mdk_rdev_t *dev) * Verify that dev is acceptable into mddev. * The first time, mddev->raid_disks will be 0, and data from * dev should be merged in. Subsequent calls check that dev * is new enough. Return 0 or -EINVAL * * void sync_super(mddev_t *mddev, mdk_rdev_t *dev) * Update the superblock for rdev with data in mddev * This does not write to disc. * */struct super_type { char *name; struct module *owner; int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version); int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);};/* * load_super for 0.90.0 */static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version){ char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; mdp_super_t *sb; int ret; sector_t sb_offset; /* * Calculate the position of the superblock, * it's at the end of the disk. * * It also happens to be a multiple of 4Kb. */ sb_offset = calc_dev_sboffset(rdev->bdev); rdev->sb_offset = sb_offset; ret = read_disk_sb(rdev); if (ret) return ret; ret = -EINVAL;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?