📄 md.c
字号:
/* md.c : Multiple Devices driver for Linux Copyright (C) 1998, 1999, 2000 Ingo Molnar completely rewritten, based on the MD driver code from Marc Zyngier Changes: - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net> - kerneld support by Boris Tobotras <boris@xtalk.msk.su> - kmod support by: Cyrus Durgin - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com> - Devfs support by Richard Gooch <rgooch@atnf.csiro.au> - lots of fixes and improvements to the RAID1/RAID5 and generic RAID code (such as request based resynchronization): Neil Brown <neilb@cse.unsw.edu.au>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include <linux/module.h>#include <linux/config.h>#include <linux/raid/md.h>#include <linux/sysctl.h>#include <linux/raid/xor.h>#include <linux/devfs_fs_kernel.h>#include <linux/init.h>#ifdef CONFIG_KMOD#include <linux/kmod.h>#endif#define __KERNEL_SYSCALLS__#include <linux/unistd.h>#include <asm/unaligned.h>#define MAJOR_NR MD_MAJOR#define MD_DRIVER#include <linux/blk.h>#define DEBUG 0#if DEBUG# define dprintk(x...) printk(x)#else# define dprintk(x...) do { } while(0)#endif#ifndef MODULEstatic void autostart_arrays (void);#endifstatic mdk_personality_t *pers[MAX_PERSONALITY];/* * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * is 100 KB/sec, so the extra system load does not show up that much. * Increase it if you want to have more _guaranteed_ speed. Note that * the RAID driver will use the maximum available bandwith if the IO * subsystem is idle. There is also an 'absolute maximum' reconstruction * speed limit - in case reconstruction slows down your system despite * idle IO detection. * * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. */static int sysctl_speed_limit_min = 100;static int sysctl_speed_limit_max = 100000;static struct ctl_table_header *raid_table_header;static ctl_table raid_table[] = { {DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min", &sysctl_speed_limit_min, sizeof(int), 0644, NULL, &proc_dointvec}, {DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max", &sysctl_speed_limit_max, sizeof(int), 0644, NULL, &proc_dointvec}, {0}};static ctl_table raid_dir_table[] = { {DEV_RAID, "raid", NULL, 0, 0555, raid_table}, {0}};static ctl_table raid_root_table[] = { {CTL_DEV, "dev", NULL, 0, 0555, raid_dir_table}, {0}};/* * these have to be allocated separately because external * subsystems want to have a pre-defined structure */struct hd_struct md_hd_struct[MAX_MD_DEVS];static int md_blocksizes[MAX_MD_DEVS];static int md_hardsect_sizes[MAX_MD_DEVS];static int md_maxreadahead[MAX_MD_DEVS];static mdk_thread_t *md_recovery_thread;int md_size[MAX_MD_DEVS];static struct block_device_operations md_fops;static devfs_handle_t devfs_handle;static struct gendisk md_gendisk={ major: MD_MAJOR, major_name: "md", minor_shift: 0, max_p: 1, part: md_hd_struct, sizes: md_size, nr_real: MAX_MD_DEVS, real_devices: NULL, next: NULL, fops: &md_fops,};/* * Enables to iterate over all existing md arrays */static MD_LIST_HEAD(all_mddevs);/* * The mapping between kdev and mddev is not necessary a simple * one! Eg. HSM uses several sub-devices to implement Logical * Volumes. All these sub-devices map to the same mddev. */dev_mapping_t mddev_map[MAX_MD_DEVS];void add_mddev_mapping(mddev_t * mddev, kdev_t dev, void *data){ unsigned int minor = MINOR(dev); if (MAJOR(dev) != MD_MAJOR) { MD_BUG(); return; } if (mddev_map[minor].mddev) { MD_BUG(); return; } mddev_map[minor].mddev = mddev; mddev_map[minor].data = data;}void del_mddev_mapping(mddev_t * mddev, kdev_t dev){ unsigned int minor = MINOR(dev); if (MAJOR(dev) != MD_MAJOR) { MD_BUG(); return; } if (mddev_map[minor].mddev != mddev) { MD_BUG(); return; } mddev_map[minor].mddev = NULL; mddev_map[minor].data = NULL;}static int md_make_request(request_queue_t *q, int rw, struct buffer_head * bh){ mddev_t *mddev = kdev_to_mddev(bh->b_rdev); if (mddev && mddev->pers) return mddev->pers->make_request(mddev, rw, bh); else { buffer_IO_error(bh); return 0; }}static mddev_t * alloc_mddev(kdev_t dev){ mddev_t *mddev; if (MAJOR(dev) != MD_MAJOR) { MD_BUG(); return 0; } mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); if (!mddev) return NULL; memset(mddev, 0, sizeof(*mddev)); mddev->__minor = MINOR(dev); init_MUTEX(&mddev->reconfig_sem); init_MUTEX(&mddev->recovery_sem); init_MUTEX(&mddev->resync_sem); MD_INIT_LIST_HEAD(&mddev->disks); MD_INIT_LIST_HEAD(&mddev->all_mddevs); atomic_set(&mddev->active, 0); /* * The 'base' mddev is the one with data NULL. * personalities can create additional mddevs * if necessary. */ add_mddev_mapping(mddev, dev, 0); md_list_add(&mddev->all_mddevs, &all_mddevs); MOD_INC_USE_COUNT; return mddev;}mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr){ mdk_rdev_t * rdev; struct md_list_head *tmp; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == nr) return rdev; } return NULL;}mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev){ struct md_list_head *tmp; mdk_rdev_t *rdev; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->dev == dev) return rdev; } return NULL;}static MD_LIST_HEAD(device_names);char * partition_name(kdev_t dev){ struct gendisk *hd; static char nomem [] = "<nomem>"; dev_name_t *dname; struct md_list_head *tmp = device_names.next; while (tmp != &device_names) { dname = md_list_entry(tmp, dev_name_t, list); if (dname->dev == dev) return dname->name; tmp = tmp->next; } dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL); if (!dname) return nomem; /* * ok, add this new device name to the list */ hd = get_gendisk (dev); dname->name = NULL; if (hd) dname->name = disk_name (hd, MINOR(dev), dname->namebuf); if (!dname->name) { sprintf (dname->namebuf, "[dev %s]", kdevname(dev)); dname->name = dname->namebuf; } dname->dev = dev; MD_INIT_LIST_HEAD(&dname->list); md_list_add(&dname->list, &device_names); return dname->name;}static unsigned int calc_dev_sboffset(kdev_t dev, mddev_t *mddev, int persistent){ unsigned int size = 0; if (blk_size[MAJOR(dev)]) size = blk_size[MAJOR(dev)][MINOR(dev)]; if (persistent) size = MD_NEW_SIZE_BLOCKS(size); return size;}static unsigned int calc_dev_size(kdev_t dev, mddev_t *mddev, int persistent){ unsigned int size; size = calc_dev_sboffset(dev, mddev, persistent); if (!mddev->sb) { MD_BUG(); return size; } if (mddev->sb->chunk_size) size &= ~(mddev->sb->chunk_size/1024 - 1); return size;}static unsigned int zoned_raid_size(mddev_t *mddev){ unsigned int mask; mdk_rdev_t * rdev; struct md_list_head *tmp; if (!mddev->sb) { MD_BUG(); return -EINVAL; } /* * do size and offset calculations. */ mask = ~(mddev->sb->chunk_size/1024 - 1); ITERATE_RDEV(mddev,rdev,tmp) { rdev->size &= mask; md_size[mdidx(mddev)] += rdev->size; } return 0;}/* * We check wether all devices are numbered from 0 to nb_dev-1. The * order is guaranteed even after device name changes. * * Some personalities (raid0, linear) use this. Personalities that * provide data have to be able to deal with loss of individual * disks, so they do their checking themselves. */int md_check_ordering(mddev_t *mddev){ int i, c; mdk_rdev_t *rdev; struct md_list_head *tmp; /* * First, all devices must be fully functional */ ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) { printk(KERN_ERR "md: md%d's device %s faulty, aborting.\n", mdidx(mddev), partition_name(rdev->dev)); goto abort; } } c = 0; ITERATE_RDEV(mddev,rdev,tmp) { c++; } if (c != mddev->nb_dev) { MD_BUG(); goto abort; } if (mddev->nb_dev != mddev->sb->raid_disks) { printk(KERN_ERR "md: md%d, array needs %d disks, has %d, aborting.\n", mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev); goto abort; } /* * Now the numbering check */ for (i = 0; i < mddev->nb_dev; i++) { c = 0; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->desc_nr == i) c++; } if (!c) { printk(KERN_ERR "md: md%d, missing disk #%d, aborting.\n", mdidx(mddev), i); goto abort; } if (c > 1) { printk(KERN_ERR "md: md%d, too many disks #%d, aborting.\n", mdidx(mddev), i); goto abort; } } return 0;abort: return 1;}static void remove_descriptor(mdp_disk_t *disk, mdp_super_t *sb){ if (disk_active(disk)) { sb->working_disks--; } else { if (disk_spare(disk)) { sb->spare_disks--; sb->working_disks--; } else { sb->failed_disks--; } } sb->nr_disks--; disk->major = 0; disk->minor = 0; mark_disk_removed(disk);}#define BAD_MAGIC KERN_ERR \"md: invalid raid superblock magic on %s\n"#define BAD_MINOR KERN_ERR \"md: %s: invalid raid minor (%x)\n"#define OUT_OF_MEM KERN_ALERT \"md: out of memory.\n"#define NO_SB KERN_ERR \"md: disabled device %s, could not read superblock.\n"#define BAD_CSUM KERN_WARNING \"md: invalid superblock checksum on %s\n"static int alloc_array_sb(mddev_t * mddev){ if (mddev->sb) { MD_BUG(); return 0; } mddev->sb = (mdp_super_t *) __get_free_page (GFP_KERNEL); if (!mddev->sb) return -ENOMEM; md_clear_page(mddev->sb); return 0;}static int alloc_disk_sb(mdk_rdev_t * rdev){ if (rdev->sb) MD_BUG(); rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL); if (!rdev->sb) { printk(OUT_OF_MEM); return -EINVAL; } md_clear_page(rdev->sb); return 0;}static void free_disk_sb(mdk_rdev_t * rdev){ if (rdev->sb) { free_page((unsigned long) rdev->sb); rdev->sb = NULL; rdev->sb_offset = 0; rdev->size = 0; } else { if (!rdev->faulty) MD_BUG(); }}static int read_disk_sb(mdk_rdev_t * rdev){ int ret = -EINVAL; struct buffer_head *bh = NULL; kdev_t dev = rdev->dev; mdp_super_t *sb; unsigned long sb_offset; if (!rdev->sb) { MD_BUG(); goto abort; } /* * Calculate the position of the superblock, * it's at the end of the disk */ sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1); rdev->sb_offset = sb_offset; fsync_dev(dev); set_blocksize (dev, MD_SB_BYTES); bh = bread (dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); if (bh) { sb = (mdp_super_t *) bh->b_data; memcpy (rdev->sb, sb, MD_SB_BYTES); } else { printk(NO_SB,partition_name(rdev->dev)); goto abort; } printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo); ret = 0;abort: if (bh) brelse (bh); return ret;}static unsigned int calc_sb_csum(mdp_super_t * sb){ unsigned int disk_csum, csum; disk_csum = sb->sb_csum; sb->sb_csum = 0; csum = csum_partial((void *)sb, MD_SB_BYTES, 0); sb->sb_csum = disk_csum; return csum;}/* * Check one RAID superblock for generic plausibility */static int check_disk_sb(mdk_rdev_t * rdev){ mdp_super_t *sb; int ret = -EINVAL; sb = rdev->sb; if (!sb) { MD_BUG(); goto abort; } if (sb->md_magic != MD_SB_MAGIC) { printk(BAD_MAGIC, partition_name(rdev->dev)); goto abort; } if (sb->md_minor >= MAX_MD_DEVS) { printk(BAD_MINOR, partition_name(rdev->dev), sb->md_minor); goto abort; } if (calc_sb_csum(sb) != sb->sb_csum) { printk(BAD_CSUM, partition_name(rdev->dev)); goto abort; } ret = 0;abort: return ret;}static kdev_t dev_unit(kdev_t dev){ unsigned int mask; struct gendisk *hd = get_gendisk(dev); if (!hd) return 0; mask = ~((1 << hd->minor_shift) - 1); return MKDEV(MAJOR(dev), MINOR(dev) & mask);}static mdk_rdev_t * match_dev_unit(mddev_t *mddev, kdev_t dev){ struct md_list_head *tmp; mdk_rdev_t *rdev; ITERATE_RDEV(mddev,rdev,tmp) if (dev_unit(rdev->dev) == dev_unit(dev)) return rdev; return NULL;}static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2){ struct md_list_head *tmp; mdk_rdev_t *rdev; ITERATE_RDEV(mddev1,rdev,tmp) if (match_dev_unit(mddev2, rdev->dev)) return 1; return 0;}static MD_LIST_HEAD(all_raid_disks);static MD_LIST_HEAD(pending_raid_disks);static void bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev){ mdk_rdev_t *same_pdev; if (rdev->mddev) { MD_BUG(); return; } same_pdev = match_dev_unit(mddev, rdev->dev); if (same_pdev)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -