📄 multipath.c
字号:
/* * multipath.c : Multiple Devices driver for Linux * * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat * * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman * * MULTIPATH management functions. * * derived from raid1.c. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/module.h>#include <linux/slab.h>#include <linux/raid/multipath.h>#include <asm/atomic.h>#define MAJOR_NR MD_MAJOR#define MD_DRIVER#define MD_PERSONALITY#define MAX_WORK_PER_DISK 128#define NR_RESERVED_BUFS 32/* * The following can be used to debug the driver */#define MULTIPATH_DEBUG 0#if MULTIPATH_DEBUG#define PRINTK(x...) printk(x)#define inline#define __inline__#else#define PRINTK(x...) do { } while (0)#endifstatic mdk_personality_t multipath_personality;static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state);static struct multipath_bh *multipath_alloc_mpbh(multipath_conf_t *conf){ struct multipath_bh *mp_bh = NULL; do { md_spin_lock_irq(&conf->device_lock); if (!conf->freer1_blocked && conf->freer1) { mp_bh = conf->freer1; conf->freer1 = mp_bh->next_mp; conf->freer1_cnt--; mp_bh->next_mp = NULL; mp_bh->state = (1 << MPBH_PreAlloc); mp_bh->bh_req.b_state = 0; } md_spin_unlock_irq(&conf->device_lock); if (mp_bh) return mp_bh; mp_bh = (struct multipath_bh *) kmalloc(sizeof(struct multipath_bh), GFP_NOIO); if (mp_bh) { memset(mp_bh, 0, sizeof(*mp_bh)); return mp_bh; } conf->freer1_blocked = 1; wait_disk_event(conf->wait_buffer, !conf->freer1_blocked || conf->freer1_cnt > NR_RESERVED_BUFS/2 ); conf->freer1_blocked = 0; } while (1);}static inline void multipath_free_mpbh(struct multipath_bh *mp_bh){ multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); if (test_bit(MPBH_PreAlloc, &mp_bh->state)) { unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); mp_bh->next_mp = conf->freer1; conf->freer1 = mp_bh; conf->freer1_cnt++; spin_unlock_irqrestore(&conf->device_lock, flags); wake_up(&conf->wait_buffer); } else { kfree(mp_bh); }}static int multipath_grow_mpbh (multipath_conf_t *conf, int cnt){ int i = 0; while (i < cnt) { struct multipath_bh *mp_bh; mp_bh = (struct multipath_bh*)kmalloc(sizeof(*mp_bh), GFP_KERNEL); if (!mp_bh) break; memset(mp_bh, 0, sizeof(*mp_bh)); set_bit(MPBH_PreAlloc, &mp_bh->state); mp_bh->mddev = conf->mddev; multipath_free_mpbh(mp_bh); i++; } return i;}static void multipath_shrink_mpbh(multipath_conf_t *conf){ md_spin_lock_irq(&conf->device_lock); while (conf->freer1) { struct multipath_bh *mp_bh = conf->freer1; conf->freer1 = mp_bh->next_mp; conf->freer1_cnt--; kfree(mp_bh); } md_spin_unlock_irq(&conf->device_lock);}static int multipath_map (mddev_t *mddev, kdev_t *rdev){ multipath_conf_t *conf = mddev_to_conf(mddev); int i, disks = MD_SB_DISKS; /* * Later we do read balancing on the read side * now we use the first available disk. */ for (i = 0; i < disks; i++) { if (conf->multipaths[i].operational) { *rdev = conf->multipaths[i].dev; return (0); } } printk (KERN_ERR "multipath_map(): no more operational IO paths?\n"); return (-1);}static void multipath_reschedule_retry (struct multipath_bh *mp_bh){ unsigned long flags; mddev_t *mddev = mp_bh->mddev; multipath_conf_t *conf = mddev_to_conf(mddev); md_spin_lock_irqsave(&retry_list_lock, flags); if (multipath_retry_list == NULL) multipath_retry_tail = &multipath_retry_list; *multipath_retry_tail = mp_bh; multipath_retry_tail = &mp_bh->next_mp; mp_bh->next_mp = NULL; md_spin_unlock_irqrestore(&retry_list_lock, flags); md_wakeup_thread(conf->thread);}/* * multipath_end_bh_io() is called when we have finished servicing a multipathed * operation and are ready to return a success/failure code to the buffer * cache layer. */static void multipath_end_bh_io (struct multipath_bh *mp_bh, int uptodate){ struct buffer_head *bh = mp_bh->master_bh; bh->b_end_io(bh, uptodate); multipath_free_mpbh(mp_bh);}void multipath_end_request (struct buffer_head *bh, int uptodate){ struct multipath_bh * mp_bh = (struct multipath_bh *)(bh->b_private); /* * this branch is our 'one multipath IO has finished' event handler: */ if (!uptodate) md_error (mp_bh->mddev, bh->b_dev); else /* * Set MPBH_Uptodate in our master buffer_head, so that * we will return a good error code for to the higher * levels even if IO on some other multipathed buffer fails. * * The 'master' represents the complex operation to * user-side. So if something waits for IO, then it will * wait for the 'master' buffer_head. */ set_bit (MPBH_Uptodate, &mp_bh->state); if (uptodate) { multipath_end_bh_io(mp_bh, uptodate); return; } /* * oops, IO error: */ printk(KERN_ERR "multipath: %s: rescheduling block %lu\n", partition_name(bh->b_dev), bh->b_blocknr); multipath_reschedule_retry(mp_bh); return;}/* * This routine returns the disk from which the requested read should * be done. */static int multipath_read_balance (multipath_conf_t *conf){ int disk; for (disk = 0; disk < conf->raid_disks; disk++) if (conf->multipaths[disk].operational) return disk; BUG(); return 0;}static int multipath_make_request (mddev_t *mddev, int rw, struct buffer_head * bh){ multipath_conf_t *conf = mddev_to_conf(mddev); struct buffer_head *bh_req; struct multipath_bh * mp_bh; struct multipath_info *multipath; if (!buffer_locked(bh)) BUG(); /* * make_request() can abort the operation when READA is being * used and no empty request is available. * * Currently, just replace the command with READ/WRITE. */ if (rw == READA) rw = READ; mp_bh = multipath_alloc_mpbh (conf); mp_bh->master_bh = bh; mp_bh->mddev = mddev; mp_bh->cmd = rw; /* * read balancing logic: */ multipath = conf->multipaths + multipath_read_balance(conf); bh_req = &mp_bh->bh_req; memcpy(bh_req, bh, sizeof(*bh)); bh_req->b_blocknr = bh->b_rsector; bh_req->b_dev = multipath->dev; bh_req->b_rdev = multipath->dev;/* bh_req->b_rsector = bh->n_rsector; */ bh_req->b_end_io = multipath_end_request; bh_req->b_private = mp_bh; generic_make_request (rw, bh_req); return 0;}static int multipath_status (char *page, mddev_t *mddev){ multipath_conf_t *conf = mddev_to_conf(mddev); int sz = 0, i; sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks); for (i = 0; i < conf->raid_disks; i++) sz += sprintf (page+sz, "%s", conf->multipaths[i].operational ? "U" : "_"); sz += sprintf (page+sz, "]"); return sz;}#define LAST_DISK KERN_ALERT \"multipath: only one IO path left and IO error.\n"#define NO_SPARE_DISK KERN_ALERT \"multipath: no spare IO path left!\n"#define DISK_FAILED KERN_ALERT \"multipath: IO failure on %s, disabling IO path. \n" \" Operation continuing on %d IO paths.\n"static void mark_disk_bad (mddev_t *mddev, int failed){ multipath_conf_t *conf = mddev_to_conf(mddev); struct multipath_info *multipath = conf->multipaths+failed; mdp_super_t *sb = mddev->sb; multipath->operational = 0; mark_disk_faulty(sb->disks+multipath->number); mark_disk_nonsync(sb->disks+multipath->number); mark_disk_inactive(sb->disks+multipath->number); sb->active_disks--; sb->working_disks--; sb->failed_disks++; mddev->sb_dirty = 1; md_wakeup_thread(conf->thread); conf->working_disks--; printk (DISK_FAILED, partition_name (multipath->dev), conf->working_disks);}/* * Careful, this can execute in IRQ contexts as well! */static int multipath_error (mddev_t *mddev, kdev_t dev){ multipath_conf_t *conf = mddev_to_conf(mddev); struct multipath_info * multipaths = conf->multipaths; int disks = MD_SB_DISKS; int other_paths = 1; int i; if (conf->working_disks == 1) { other_paths = 0; for (i = 0; i < disks; i++) { if (multipaths[i].spare) { other_paths = 1; break; } } } if (!other_paths) { /* * Uh oh, we can do nothing if this is our last path, but * first check if this is a queued request for a device * which has just failed. */ for (i = 0; i < disks; i++) { if (multipaths[i].dev==dev && !multipaths[i].operational) return 0; } printk (LAST_DISK); } else { /* * Mark disk as unusable */ for (i = 0; i < disks; i++) { if (multipaths[i].dev==dev && multipaths[i].operational) { mark_disk_bad(mddev, i); break; } } if (!conf->working_disks) { int err = 1; mdp_disk_t *spare; mdp_super_t *sb = mddev->sb; spare = get_spare(mddev); if (spare) { err = multipath_diskop(mddev, &spare, DISKOP_SPARE_WRITE); printk("got DISKOP_SPARE_WRITE err: %d. (spare_faulty(): %d)\n", err, disk_faulty(spare)); } if (!err && !disk_faulty(spare)) { multipath_diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); mark_disk_sync(spare); mark_disk_active(spare); sb->active_disks++; sb->spare_disks--; } } } return 0;}#undef LAST_DISK#undef NO_SPARE_DISK#undef DISK_FAILEDstatic void print_multipath_conf (multipath_conf_t *conf){ int i; struct multipath_info *tmp; printk("MULTIPATH conf printout:\n"); if (!conf) { printk("(conf==NULL)\n"); return; } printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks, conf->raid_disks, conf->nr_disks); for (i = 0; i < MD_SB_DISKS; i++) { tmp = conf->multipaths + i; if (tmp->spare || tmp->operational || tmp->number || tmp->raid_disk || tmp->used_slot) printk(" disk%d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", i, tmp->spare,tmp->operational, tmp->number,tmp->raid_disk,tmp->used_slot, partition_name(tmp->dev)); }}static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state){ int err = 0; int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; multipath_conf_t *conf = mddev->private; struct multipath_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; mdp_super_t *sb = mddev->sb; mdp_disk_t *failed_desc, *spare_desc, *added_desc; mdk_rdev_t *spare_rdev, *failed_rdev; print_multipath_conf(conf); md_spin_lock_irq(&conf->device_lock); /* * find the disk ... */ switch (state) { case DISKOP_SPARE_ACTIVE: /* * Find the failed disk within the MULTIPATH configuration ... * (this can only be in the first conf->working_disks part) */ for (i = 0; i < conf->raid_disks; i++) { tmp = conf->multipaths + i; if ((!tmp->operational && !tmp->spare) || !tmp->used_slot) { failed_disk = i; break; } } /* * When we activate a spare disk we _must_ have a disk in * the lower (active) part of the array to replace. */ if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { MD_BUG(); err = 1; goto abort; } /* fall through */ case DISKOP_SPARE_WRITE: case DISKOP_SPARE_INACTIVE: /* * Find the spare disk ... (can only be in the 'high' * area of the array) */ for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { tmp = conf->multipaths + i; if (tmp->spare && tmp->number == (*d)->number) { spare_disk = i; break; } } if (spare_disk == -1) { MD_BUG(); err = 1; goto abort; } break; case DISKOP_HOT_REMOVE_DISK: for (i = 0; i < MD_SB_DISKS; i++) { tmp = conf->multipaths + i; if (tmp->used_slot && (tmp->number == (*d)->number)) { if (tmp->operational) { printk(KERN_ERR "hot-remove-disk, slot %d is identified to be the requested disk (number %d), but is still operational!\n", i, (*d)->number); err = -EBUSY; goto abort; } removed_disk = i; break; } } if (removed_disk == -1) { MD_BUG(); err = 1; goto abort; } break; case DISKOP_HOT_ADD_DISK: for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { tmp = conf->multipaths + i; if (!tmp->used_slot) { added_disk = i; break; } } if (added_disk == -1) { MD_BUG(); err = 1; goto abort; } break; } switch (state) { /* * Switch the spare disk to write-only mode: */ case DISKOP_SPARE_WRITE: sdisk = conf->multipaths + spare_disk; sdisk->operational = 1; break; /* * Deactivate a spare disk: */ case DISKOP_SPARE_INACTIVE: sdisk = conf->multipaths + spare_disk; sdisk->operational = 0; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -