raid5.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,920 行 · 第 1/4 页
C
1,920 行
/* * raid5.c : Multiple Devices driver for Linux * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman * Copyright (C) 1999, 2000 Ingo Molnar * * RAID-5 management functions. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/config.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/raid/raid5.h>#include <linux/highmem.h>#include <asm/bitops.h>#include <asm/atomic.h>/* * Stripe cache */#define NR_STRIPES 256#define STRIPE_SIZE PAGE_SIZE#define STRIPE_SHIFT (PAGE_SHIFT - 9)#define STRIPE_SECTORS (STRIPE_SIZE>>9)#define IO_THRESHOLD 1#define HASH_PAGES 1#define HASH_PAGES_ORDER 0#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))#define HASH_MASK (NR_HASH - 1)#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])/* bio's attached to a stripe+device for I/O are linked together in bi_sector * order without overlap. There may be several bio's per stripe+device, and * a bio could span several devices. * When walking this list for a particular stripe+device, we must never proceed * beyond a bio that extends past this device, as the next bio might no longer * be valid. * This macro is used to determine the 'next' bio in the list, given the sector * of the current stripe+device */#define r5_next_bio(bio, sect) ( ( bio->bi_sector + (bio->bi_size>>9) < sect + STRIPE_SECTORS) ? bio->bi_next : NULL)/* * The following can be used to debug the driver */#define RAID5_DEBUG 0#define RAID5_PARANOIA 1#if RAID5_PARANOIA && defined(CONFIG_SMP)# define CHECK_DEVLOCK() if (!spin_is_locked(&conf->device_lock)) BUG()#else# define CHECK_DEVLOCK()#endif#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x)))#if RAID5_DEBUG#define inline#define __inline__#endifstatic void print_raid5_conf (raid5_conf_t *conf);static inline void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh){ if (atomic_dec_and_test(&sh->count)) { if (!list_empty(&sh->lru)) BUG(); if (atomic_read(&conf->active_stripes)==0) BUG(); if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); else list_add_tail(&sh->lru, &conf->handle_list); md_wakeup_thread(conf->mddev->thread); } else { if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) md_wakeup_thread(conf->mddev->thread); } list_add_tail(&sh->lru, &conf->inactive_list); atomic_dec(&conf->active_stripes); if (!conf->inactive_blocked || atomic_read(&conf->active_stripes) < (NR_STRIPES*3/4)) wake_up(&conf->wait_for_stripe); } }}static void release_stripe(struct stripe_head *sh){ raid5_conf_t *conf = sh->raid_conf; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); __release_stripe(conf, sh); spin_unlock_irqrestore(&conf->device_lock, flags);}static void remove_hash(struct stripe_head *sh){ PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); if (sh->hash_pprev) { if (sh->hash_next) sh->hash_next->hash_pprev = sh->hash_pprev; *sh->hash_pprev = sh->hash_next; sh->hash_pprev = NULL; }}static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh){ struct stripe_head **shp = &stripe_hash(conf, sh->sector); PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); CHECK_DEVLOCK(); if ((sh->hash_next = *shp) != NULL) (*shp)->hash_pprev = &sh->hash_next; *shp = sh; sh->hash_pprev = shp;}/* find an idle stripe, make sure it is unhashed, and return it. */static struct stripe_head *get_free_stripe(raid5_conf_t *conf){ struct stripe_head *sh = NULL; struct list_head *first; CHECK_DEVLOCK(); if (list_empty(&conf->inactive_list)) goto out; first = conf->inactive_list.next; sh = list_entry(first, struct stripe_head, lru); list_del_init(first); remove_hash(sh); atomic_inc(&conf->active_stripes);out: return sh;}static void shrink_buffers(struct stripe_head *sh, int num){ struct page *p; int i; for (i=0; i<num ; i++) { p = sh->dev[i].page; if (!p) continue; sh->dev[i].page = NULL; page_cache_release(p); }}static int grow_buffers(struct stripe_head *sh, int num){ int i; for (i=0; i<num; i++) { struct page *page; if (!(page = alloc_page(GFP_KERNEL))) { return 1; } sh->dev[i].page = page; } return 0;}static void raid5_build_block (struct stripe_head *sh, int i);static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx){ raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; if (atomic_read(&sh->count) != 0) BUG(); if (test_bit(STRIPE_HANDLE, &sh->state)) BUG(); CHECK_DEVLOCK(); PRINTK("init_stripe called, stripe %llu\n", (unsigned long long)sh->sector); remove_hash(sh); sh->sector = sector; sh->pd_idx = pd_idx; sh->state = 0; for (i=disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (dev->toread || dev->towrite || dev->written || test_bit(R5_LOCKED, &dev->flags)) { printk("sector=%llx i=%d %p %p %p %d\n", (unsigned long long)sh->sector, i, dev->toread, dev->towrite, dev->written, test_bit(R5_LOCKED, &dev->flags)); BUG(); } dev->flags = 0; raid5_build_block(sh, i); } insert_hash(conf, sh);}static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector){ struct stripe_head *sh; CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) if (sh->sector == sector) return sh; PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); return NULL;}static void unplug_slaves(mddev_t *mddev);static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int pd_idx, int noblock) { struct stripe_head *sh; PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector); spin_lock_irq(&conf->device_lock); do { sh = __find_stripe(conf, sector); if (!sh) { if (!conf->inactive_blocked) sh = get_free_stripe(conf); if (noblock && sh == NULL) break; if (!sh) { conf->inactive_blocked = 1; wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list) && (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) || !conf->inactive_blocked), conf->device_lock, unplug_slaves(conf->mddev); ); conf->inactive_blocked = 0; } else init_stripe(sh, sector, pd_idx); } else { if (atomic_read(&sh->count)) { if (!list_empty(&sh->lru)) BUG(); } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); if (list_empty(&sh->lru)) BUG(); list_del_init(&sh->lru); } } } while (sh == NULL); if (sh) atomic_inc(&sh->count); spin_unlock_irq(&conf->device_lock); return sh;}static int grow_stripes(raid5_conf_t *conf, int num){ struct stripe_head *sh; kmem_cache_t *sc; int devs = conf->raid_disks; sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev)); sc = kmem_cache_create(conf->cache_name, sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), 0, 0, NULL, NULL); if (!sc) return 1; conf->slab_cache = sc; while (num--) { sh = kmem_cache_alloc(sc, GFP_KERNEL); if (!sh) return 1; memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev)); sh->raid_conf = conf; sh->lock = SPIN_LOCK_UNLOCKED; if (grow_buffers(sh, conf->raid_disks)) { shrink_buffers(sh, conf->raid_disks); kmem_cache_free(sc, sh); return 1; } /* we just created an active stripe so... */ atomic_set(&sh->count, 1); atomic_inc(&conf->active_stripes); INIT_LIST_HEAD(&sh->lru); release_stripe(sh); } return 0;}static void shrink_stripes(raid5_conf_t *conf){ struct stripe_head *sh; while (1) { spin_lock_irq(&conf->device_lock); sh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); if (!sh) break; if (atomic_read(&sh->count)) BUG(); shrink_buffers(sh, conf->raid_disks); kmem_cache_free(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); } kmem_cache_destroy(conf->slab_cache); conf->slab_cache = NULL;}static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done, int error){ struct stripe_head *sh = bi->bi_private; raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); if (bi->bi_size) return 1; for (i=0 ; i<disks; i++) if (bi == &sh->dev[i].req) break; PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { BUG(); return 0; } if (uptodate) {#if 0 struct bio *bio; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); /* we can return a buffer if we bypassed the cache or * if the top buffer is not in highmem. If there are * multiple buffers, leave the extra work to * handle_stripe */ buffer = sh->bh_read[i]; if (buffer && (!PageHighMem(buffer->b_page) || buffer->b_page == bh->b_page ) ) { sh->bh_read[i] = buffer->b_reqnext; buffer->b_reqnext = NULL; } else buffer = NULL; spin_unlock_irqrestore(&conf->device_lock, flags); if (sh->bh_page[i]==bh->b_page) set_buffer_uptodate(bh); if (buffer) { if (buffer->b_page != bh->b_page) memcpy(buffer->b_data, bh->b_data, bh->b_size); buffer->b_end_io(buffer, 1); }#else set_bit(R5_UPTODATE, &sh->dev[i].flags);#endif } else { md_error(conf->mddev, conf->disks[i].rdev); clear_bit(R5_UPTODATE, &sh->dev[i].flags); } rdev_dec_pending(conf->disks[i].rdev, conf->mddev);#if 0 /* must restore b_page before unlocking buffer... */ if (sh->bh_page[i] != bh->b_page) { bh->b_page = sh->bh_page[i]; bh->b_data = page_address(bh->b_page); clear_buffer_uptodate(bh); }#endif clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); return 0;}static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error){ struct stripe_head *sh = bi->bi_private; raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; unsigned long flags; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); if (bi->bi_size) return 1; for (i=0 ; i<disks; i++) if (bi == &sh->dev[i].req) break; PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { BUG(); return 0; } spin_lock_irqsave(&conf->device_lock, flags); if (!uptodate) md_error(conf->mddev, conf->disks[i].rdev); rdev_dec_pending(conf->disks[i].rdev, conf->mddev); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); __release_stripe(conf, sh); spin_unlock_irqrestore(&conf->device_lock, flags); return 0;}static sector_t compute_blocknr(struct stripe_head *sh, int i); static void raid5_build_block (struct stripe_head *sh, int i){ struct r5dev *dev = &sh->dev[i]; bio_init(&dev->req); dev->req.bi_io_vec = &dev->vec; dev->req.bi_vcnt++; dev->vec.bv_page = dev->page; dev->vec.bv_len = STRIPE_SIZE; dev->vec.bv_offset = 0; dev->req.bi_sector = sh->sector; dev->req.bi_private = sh; dev->flags = 0; if (i != sh->pd_idx) dev->sector = compute_blocknr(sh, i);}static void error(mddev_t *mddev, mdk_rdev_t *rdev){ char b[BDEVNAME_SIZE]; raid5_conf_t *conf = (raid5_conf_t *) mddev->private; PRINTK("raid5: error called\n"); if (!rdev->faulty) { mddev->sb_dirty = 1; if (rdev->in_sync) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?