📄 raid5.c
字号:
/* * raid5.c : Multiple Devices driver for Linux * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman * Copyright (C) 1999, 2000 Ingo Molnar * * RAID-5 management functions. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/config.h>#include <linux/module.h>#include <linux/locks.h>#include <linux/slab.h>#include <linux/raid/raid5.h>#include <asm/bitops.h>#include <asm/atomic.h>static mdk_personality_t raid5_personality;/* * Stripe cache */#define NR_STRIPES 256#define IO_THRESHOLD 1#define HASH_PAGES 1#define HASH_PAGES_ORDER 0#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))#define HASH_MASK (NR_HASH - 1)#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) / ((conf)->buffer_size >> 9)) & HASH_MASK])/* * The following can be used to debug the driver */#define RAID5_DEBUG 0#define RAID5_PARANOIA 1#if RAID5_PARANOIA && CONFIG_SMP# define CHECK_DEVLOCK() if (!spin_is_locked(&conf->device_lock)) BUG()#else# define CHECK_DEVLOCK()#endif#if RAID5_DEBUG#define PRINTK(x...) printk(x)#define inline#define __inline__#else#define PRINTK(x...) do { } while (0)#endifstatic void print_raid5_conf (raid5_conf_t *conf);static inline void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh){ if (atomic_dec_and_test(&sh->count)) { if (!list_empty(&sh->lru)) BUG(); if (atomic_read(&conf->active_stripes)==0) BUG(); if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); else list_add_tail(&sh->lru, &conf->handle_list); md_wakeup_thread(conf->thread); } else { if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) md_wakeup_thread(conf->thread); } list_add_tail(&sh->lru, &conf->inactive_list); atomic_dec(&conf->active_stripes); if (!conf->inactive_blocked || atomic_read(&conf->active_stripes) < (NR_STRIPES*3/4)) wake_up(&conf->wait_for_stripe); } }}static void release_stripe(struct stripe_head *sh){ raid5_conf_t *conf = sh->raid_conf; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); __release_stripe(conf, sh); spin_unlock_irqrestore(&conf->device_lock, flags);}static void remove_hash(struct stripe_head *sh){ PRINTK("remove_hash(), stripe %lu\n", sh->sector); if (sh->hash_pprev) { if (sh->hash_next) sh->hash_next->hash_pprev = sh->hash_pprev; *sh->hash_pprev = sh->hash_next; sh->hash_pprev = NULL; }}static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh){ struct stripe_head **shp = &stripe_hash(conf, sh->sector); PRINTK("insert_hash(), stripe %lu\n",sh->sector); CHECK_DEVLOCK(); if ((sh->hash_next = *shp) != NULL) (*shp)->hash_pprev = &sh->hash_next; *shp = sh; sh->hash_pprev = shp;}/* find an idle stripe, make sure it is unhashed, and return it. */static struct stripe_head *get_free_stripe(raid5_conf_t *conf){ struct stripe_head *sh = NULL; struct list_head *first; CHECK_DEVLOCK(); if (list_empty(&conf->inactive_list)) goto out; first = conf->inactive_list.next; sh = list_entry(first, struct stripe_head, lru); list_del_init(first); remove_hash(sh); atomic_inc(&conf->active_stripes);out: return sh;}static void shrink_buffers(struct stripe_head *sh, int num){ struct buffer_head *bh; int i; for (i=0; i<num ; i++) { bh = sh->bh_cache[i]; if (!bh) return; sh->bh_cache[i] = NULL; free_page((unsigned long) bh->b_data); kfree(bh); }}static int grow_buffers(struct stripe_head *sh, int num, int b_size, int priority){ struct buffer_head *bh; int i; for (i=0; i<num; i++) { struct page *page; bh = kmalloc(sizeof(struct buffer_head), priority); if (!bh) return 1; memset(bh, 0, sizeof (struct buffer_head)); init_waitqueue_head(&bh->b_wait); if ((page = alloc_page(priority))) bh->b_data = page_address(page); else { kfree(bh); return 1; } atomic_set(&bh->b_count, 0); bh->b_page = page; sh->bh_cache[i] = bh; } return 0;}static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i);static inline void init_stripe(struct stripe_head *sh, unsigned long sector){ raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; if (atomic_read(&sh->count) != 0) BUG(); if (test_bit(STRIPE_HANDLE, &sh->state)) BUG(); CHECK_DEVLOCK(); PRINTK("init_stripe called, stripe %lu\n", sh->sector); remove_hash(sh); sh->sector = sector; sh->size = conf->buffer_size; sh->state = 0; for (i=disks; i--; ) { if (sh->bh_read[i] || sh->bh_write[i] || sh->bh_written[i] || buffer_locked(sh->bh_cache[i])) { printk("sector=%lx i=%d %p %p %p %d\n", sh->sector, i, sh->bh_read[i], sh->bh_write[i], sh->bh_written[i], buffer_locked(sh->bh_cache[i])); BUG(); } clear_bit(BH_Uptodate, &sh->bh_cache[i]->b_state); raid5_build_block(sh, i); } insert_hash(conf, sh);}/* the buffer size has changed, so unhash all stripes * as active stripes complete, they will go onto inactive list */static void shrink_stripe_cache(raid5_conf_t *conf){ int i; CHECK_DEVLOCK(); if (atomic_read(&conf->active_stripes)) BUG(); for (i=0; i < NR_HASH; i++) { struct stripe_head *sh; while ((sh = conf->stripe_hashtbl[i])) remove_hash(sh); }}static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector){ struct stripe_head *sh; CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %lu\n", sector); for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) if (sh->sector == sector) return sh; PRINTK("__stripe %lu not in cache\n", sector); return NULL;}static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long sector, int size, int noblock) { struct stripe_head *sh; PRINTK("get_stripe, sector %lu\n", sector); md_spin_lock_irq(&conf->device_lock); do { if (conf->buffer_size == 0 || (size && size != conf->buffer_size)) { /* either the size is being changed (buffer_size==0) or * we need to change it. * If size==0, we can proceed as soon as buffer_size gets set. * If size>0, we can proceed when active_stripes reaches 0, or * when someone else sets the buffer_size to size. * If someone sets the buffer size to something else, we will need to * assert that we want to change it again */ int oldsize = conf->buffer_size; PRINTK("get_stripe %ld/%d buffer_size is %d, %d active\n", sector, size, conf->buffer_size, atomic_read(&conf->active_stripes)); if (size==0) wait_event_lock_irq(conf->wait_for_stripe, conf->buffer_size, conf->device_lock); else { while (conf->buffer_size != size && atomic_read(&conf->active_stripes)) { conf->buffer_size = 0; wait_event_lock_irq(conf->wait_for_stripe, atomic_read(&conf->active_stripes)==0 || conf->buffer_size, conf->device_lock); PRINTK("waited and now %ld/%d buffer_size is %d - %d active\n", sector, size, conf->buffer_size, atomic_read(&conf->active_stripes)); } if (conf->buffer_size != size) { printk("raid5: switching cache buffer size, %d --> %d\n", oldsize, size); shrink_stripe_cache(conf); if (size==0) BUG(); conf->buffer_size = size; PRINTK("size now %d\n", conf->buffer_size); } } } if (size == 0) sector -= sector & ((conf->buffer_size>>9)-1); sh = __find_stripe(conf, sector); if (!sh) { if (!conf->inactive_blocked) sh = get_free_stripe(conf); if (noblock && sh == NULL) break; if (!sh) { conf->inactive_blocked = 1; wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list) && (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) || !conf->inactive_blocked), conf->device_lock); conf->inactive_blocked = 0; } else init_stripe(sh, sector); } else { if (atomic_read(&sh->count)) { if (!list_empty(&sh->lru)) BUG(); } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); if (list_empty(&sh->lru)) BUG(); list_del_init(&sh->lru); } } } while (sh == NULL); if (sh) atomic_inc(&sh->count); md_spin_unlock_irq(&conf->device_lock); return sh;}static int grow_stripes(raid5_conf_t *conf, int num, int priority){ struct stripe_head *sh; while (num--) { sh = kmalloc(sizeof(struct stripe_head), priority); if (!sh) return 1; memset(sh, 0, sizeof(*sh)); sh->raid_conf = conf; sh->lock = SPIN_LOCK_UNLOCKED; if (grow_buffers(sh, conf->raid_disks, PAGE_SIZE, priority)) { shrink_buffers(sh, conf->raid_disks); kfree(sh); return 1; } /* we just created an active stripe so... */ atomic_set(&sh->count, 1); atomic_inc(&conf->active_stripes); INIT_LIST_HEAD(&sh->lru); release_stripe(sh); } return 0;}static void shrink_stripes(raid5_conf_t *conf, int num){ struct stripe_head *sh; while (num--) { spin_lock_irq(&conf->device_lock); sh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); if (!sh) break; if (atomic_read(&sh->count)) BUG(); shrink_buffers(sh, conf->raid_disks); kfree(sh); atomic_dec(&conf->active_stripes); }}static void raid5_end_read_request (struct buffer_head * bh, int uptodate){ struct stripe_head *sh = bh->b_private; raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; unsigned long flags; for (i=0 ; i<disks; i++) if (bh == sh->bh_cache[i]) break; PRINTK("end_read_request %lu/%d, count: %d, uptodate %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { BUG(); return; } if (uptodate) { struct buffer_head *buffer; spin_lock_irqsave(&conf->device_lock, flags); /* we can return a buffer if we bypassed the cache or * if the top buffer is not in highmem. If there are * multiple buffers, leave the extra work to * handle_stripe */ buffer = sh->bh_read[i]; if (buffer && (!PageHighMem(buffer->b_page) || buffer->b_page == bh->b_page ) ) { sh->bh_read[i] = buffer->b_reqnext; buffer->b_reqnext = NULL; } else buffer = NULL; spin_unlock_irqrestore(&conf->device_lock, flags); if (sh->bh_page[i]==NULL) set_bit(BH_Uptodate, &bh->b_state); if (buffer) { if (buffer->b_page != bh->b_page) memcpy(buffer->b_data, bh->b_data, bh->b_size); buffer->b_end_io(buffer, 1); } } else { md_error(conf->mddev, bh->b_dev); clear_bit(BH_Uptodate, &bh->b_state); } /* must restore b_page before unlocking buffer... */ if (sh->bh_page[i]) { bh->b_page = sh->bh_page[i]; bh->b_data = page_address(bh->b_page); sh->bh_page[i] = NULL; clear_bit(BH_Uptodate, &bh->b_state); } clear_bit(BH_Lock, &bh->b_state); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh);}static void raid5_end_write_request (struct buffer_head *bh, int uptodate){ struct stripe_head *sh = bh->b_private; raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; unsigned long flags; for (i=0 ; i<disks; i++) if (bh == sh->bh_cache[i]) break; PRINTK("end_write_request %lu/%d, count %d, uptodate: %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { BUG(); return; } md_spin_lock_irqsave(&conf->device_lock, flags); if (!uptodate) md_error(conf->mddev, bh->b_dev); clear_bit(BH_Lock, &bh->b_state); set_bit(STRIPE_HANDLE, &sh->state); __release_stripe(conf, sh); md_spin_unlock_irqrestore(&conf->device_lock, flags);} static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i){ raid5_conf_t *conf = sh->raid_conf; struct buffer_head *bh = sh->bh_cache[i]; unsigned long block = sh->sector / (sh->size >> 9); init_buffer(bh, raid5_end_read_request, sh); bh->b_dev = conf->disks[i].dev; bh->b_blocknr = block; bh->b_state = (1 << BH_Req) | (1 << BH_Mapped); bh->b_size = sh->size; bh->b_list = BUF_LOCKED; return bh;}static int raid5_error (mddev_t *mddev, kdev_t dev){ raid5_conf_t *conf = (raid5_conf_t *) mddev->private; mdp_super_t *sb = mddev->sb; struct disk_info *disk; int i; PRINTK("raid5_error called\n"); for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) { if (disk->dev == dev) { if (disk->operational) { disk->operational = 0; mark_disk_faulty(sb->disks+disk->number); mark_disk_nonsync(sb->disks+disk->number); mark_disk_inactive(sb->disks+disk->number); sb->active_disks--; sb->working_disks--; sb->failed_disks++; mddev->sb_dirty = 1; conf->working_disks--; conf->failed_disks++; md_wakeup_thread(conf->thread); printk (KERN_ALERT "raid5: Disk failure on %s, disabling device." " Operation continuing on %d devices\n", partition_name (dev), conf->working_disks); } return 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -