📄 raid1.c
字号:
/* * raid1.c : Multiple Devices driver for Linux * * Copyright (C) 1999, 2000 Ingo Molnar, Red Hat * * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman * * RAID-1 management functions. * * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000 * * Fixes to reconstruction by Jakob 豷tergaard" <jakob@ostenfeld.dk> * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/module.h>#include <linux/malloc.h>#include <linux/raid/raid1.h>#include <asm/atomic.h>#define MAJOR_NR MD_MAJOR#define MD_DRIVER#define MD_PERSONALITY#define MAX_WORK_PER_DISK 128/* * The following can be used to debug the driver */#define RAID1_DEBUG 0#if RAID1_DEBUG#define PRINTK(x...) printk(x)#define inline#define __inline__#else#define PRINTK(x...) do { } while (0)#endifstatic mdk_personality_t raid1_personality;static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;struct raid1_bh *raid1_retry_list = NULL, **raid1_retry_tail;static struct buffer_head *raid1_alloc_bh(raid1_conf_t *conf, int cnt){ /* return a linked list of "cnt" struct buffer_heads. * don't take any off the free list unless we know we can * get all we need, otherwise we could deadlock */ struct buffer_head *bh=NULL; while(cnt) { struct buffer_head *t; md_spin_lock_irq(&conf->device_lock); if (conf->freebh_cnt >= cnt) while (cnt) { t = conf->freebh; conf->freebh = t->b_next; t->b_next = bh; bh = t; t->b_state = 0; conf->freebh_cnt--; cnt--; } md_spin_unlock_irq(&conf->device_lock); if (cnt == 0) break; t = (struct buffer_head *)kmalloc(sizeof(struct buffer_head), GFP_BUFFER); if (t) { memset(t, 0, sizeof(*t)); t->b_next = bh; bh = t; cnt--; } else { PRINTK("waiting for %d bh\n", cnt); wait_event(conf->wait_buffer, conf->freebh_cnt >= cnt); } } return bh;}static inline void raid1_free_bh(raid1_conf_t *conf, struct buffer_head *bh){ unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); while (bh) { struct buffer_head *t = bh; bh=bh->b_next; if (t->b_pprev == NULL) kfree(t); else { t->b_next= conf->freebh; conf->freebh = t; conf->freebh_cnt++; } } spin_unlock_irqrestore(&conf->device_lock, flags); wake_up(&conf->wait_buffer);}static int raid1_grow_bh(raid1_conf_t *conf, int cnt){ /* allocate cnt buffer_heads, possibly less if kalloc fails */ int i = 0; while (i < cnt) { struct buffer_head *bh; bh = kmalloc(sizeof(*bh), GFP_KERNEL); if (!bh) break; memset(bh, 0, sizeof(*bh)); md_spin_lock_irq(&conf->device_lock); bh->b_pprev = &conf->freebh; bh->b_next = conf->freebh; conf->freebh = bh; conf->freebh_cnt++; md_spin_unlock_irq(&conf->device_lock); i++; } return i;}static int raid1_shrink_bh(raid1_conf_t *conf, int cnt){ /* discard cnt buffer_heads, if we can find them */ int i = 0; md_spin_lock_irq(&conf->device_lock); while ((i < cnt) && conf->freebh) { struct buffer_head *bh = conf->freebh; conf->freebh = bh->b_next; kfree(bh); i++; conf->freebh_cnt--; } md_spin_unlock_irq(&conf->device_lock); return i;} static struct raid1_bh *raid1_alloc_r1bh(raid1_conf_t *conf){ struct raid1_bh *r1_bh = NULL; do { md_spin_lock_irq(&conf->device_lock); if (conf->freer1) { r1_bh = conf->freer1; conf->freer1 = r1_bh->next_r1; r1_bh->next_r1 = NULL; r1_bh->state = 0; r1_bh->bh_req.b_state = 0; } md_spin_unlock_irq(&conf->device_lock); if (r1_bh) return r1_bh; r1_bh = (struct raid1_bh *) kmalloc(sizeof(struct raid1_bh), GFP_BUFFER); if (r1_bh) { memset(r1_bh, 0, sizeof(*r1_bh)); return r1_bh; } wait_event(conf->wait_buffer, conf->freer1); } while (1);}static inline void raid1_free_r1bh(struct raid1_bh *r1_bh){ struct buffer_head *bh = r1_bh->mirror_bh_list; raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev); r1_bh->mirror_bh_list = NULL; if (test_bit(R1BH_PreAlloc, &r1_bh->state)) { unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); r1_bh->next_r1 = conf->freer1; conf->freer1 = r1_bh; spin_unlock_irqrestore(&conf->device_lock, flags); } else { kfree(r1_bh); } raid1_free_bh(conf, bh);}static int raid1_grow_r1bh (raid1_conf_t *conf, int cnt){ int i = 0; while (i < cnt) { struct raid1_bh *r1_bh; r1_bh = (struct raid1_bh*)kmalloc(sizeof(*r1_bh), GFP_KERNEL); if (!r1_bh) break; memset(r1_bh, 0, sizeof(*r1_bh)); md_spin_lock_irq(&conf->device_lock); set_bit(R1BH_PreAlloc, &r1_bh->state); r1_bh->next_r1 = conf->freer1; conf->freer1 = r1_bh; md_spin_unlock_irq(&conf->device_lock); i++; } return i;}static void raid1_shrink_r1bh(raid1_conf_t *conf){ md_spin_lock_irq(&conf->device_lock); while (conf->freer1) { struct raid1_bh *r1_bh = conf->freer1; conf->freer1 = r1_bh->next_r1; kfree(r1_bh); } md_spin_unlock_irq(&conf->device_lock);}static inline void raid1_free_buf(struct raid1_bh *r1_bh){ unsigned long flags; struct buffer_head *bh = r1_bh->mirror_bh_list; raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev); r1_bh->mirror_bh_list = NULL; spin_lock_irqsave(&conf->device_lock, flags); r1_bh->next_r1 = conf->freebuf; conf->freebuf = r1_bh; spin_unlock_irqrestore(&conf->device_lock, flags); raid1_free_bh(conf, bh);}static struct raid1_bh *raid1_alloc_buf(raid1_conf_t *conf){ struct raid1_bh *r1_bh; md_spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_buffer, conf->freebuf, conf->device_lock); r1_bh = conf->freebuf; conf->freebuf = r1_bh->next_r1; r1_bh->next_r1= NULL; md_spin_unlock_irq(&conf->device_lock); return r1_bh;}static int raid1_grow_buffers (raid1_conf_t *conf, int cnt){ int i = 0; md_spin_lock_irq(&conf->device_lock); while (i < cnt) { struct raid1_bh *r1_bh; struct page *page; page = alloc_page(GFP_KERNEL); if (!page) break; r1_bh = (struct raid1_bh *) kmalloc(sizeof(*r1_bh), GFP_KERNEL); if (!r1_bh) { __free_page(page); break; } memset(r1_bh, 0, sizeof(*r1_bh)); r1_bh->bh_req.b_page = page; r1_bh->bh_req.b_data = page_address(page); r1_bh->next_r1 = conf->freebuf; conf->freebuf = r1_bh; i++; } md_spin_unlock_irq(&conf->device_lock); return i;}static void raid1_shrink_buffers (raid1_conf_t *conf){ md_spin_lock_irq(&conf->device_lock); while (conf->freebuf) { struct raid1_bh *r1_bh = conf->freebuf; conf->freebuf = r1_bh->next_r1; __free_page(r1_bh->bh_req.b_page); kfree(r1_bh); } md_spin_unlock_irq(&conf->device_lock);}static int raid1_map (mddev_t *mddev, kdev_t *rdev, unsigned long size){ raid1_conf_t *conf = mddev_to_conf(mddev); int i, disks = MD_SB_DISKS; /* * Later we do read balancing on the read side * now we use the first available disk. */ for (i = 0; i < disks; i++) { if (conf->mirrors[i].operational) { *rdev = conf->mirrors[i].dev; return (0); } } printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n"); return (-1);}static void raid1_reschedule_retry (struct raid1_bh *r1_bh){ unsigned long flags; mddev_t *mddev = r1_bh->mddev; raid1_conf_t *conf = mddev_to_conf(mddev); md_spin_lock_irqsave(&retry_list_lock, flags); if (raid1_retry_list == NULL) raid1_retry_tail = &raid1_retry_list; *raid1_retry_tail = r1_bh; raid1_retry_tail = &r1_bh->next_r1; r1_bh->next_r1 = NULL; md_spin_unlock_irqrestore(&retry_list_lock, flags); md_wakeup_thread(conf->thread);}static void inline io_request_done(unsigned long sector, raid1_conf_t *conf, int phase){ unsigned long flags; spin_lock_irqsave(&conf->segment_lock, flags); if (sector < conf->start_active) conf->cnt_done--; else if (sector >= conf->start_future && conf->phase == phase) conf->cnt_future--; else if (!--conf->cnt_pending) wake_up(&conf->wait_ready); spin_unlock_irqrestore(&conf->segment_lock, flags);}static void inline sync_request_done (unsigned long sector, raid1_conf_t *conf){ unsigned long flags; spin_lock_irqsave(&conf->segment_lock, flags); if (sector >= conf->start_ready) --conf->cnt_ready; else if (sector >= conf->start_active) { if (!--conf->cnt_active) { conf->start_active = conf->start_ready; wake_up(&conf->wait_done); } } spin_unlock_irqrestore(&conf->segment_lock, flags);}/* * raid1_end_bh_io() is called when we have finished servicing a mirrored * operation and are ready to return a success/failure code to the buffer * cache layer. */static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate){ struct buffer_head *bh = r1_bh->master_bh; io_request_done(bh->b_rsector, mddev_to_conf(r1_bh->mddev), test_bit(R1BH_SyncPhase, &r1_bh->state)); bh->b_end_io(bh, uptodate); raid1_free_r1bh(r1_bh);}void raid1_end_request (struct buffer_head *bh, int uptodate){ struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private); /* * this branch is our 'one mirror IO has finished' event handler: */ if (!uptodate) md_error (mddev_to_kdev(r1_bh->mddev), bh->b_dev); else /* * Set R1BH_Uptodate in our master buffer_head, so that * we will return a good error code for to the higher * levels even if IO on some other mirrored buffer fails. * * The 'master' represents the complex operation to * user-side. So if something waits for IO, then it will * wait for the 'master' buffer_head. */ set_bit (R1BH_Uptodate, &r1_bh->state); /* * We split up the read and write side, imho they are * conceptually different. */ if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) { /* * we have only one buffer_head on the read side */ if (uptodate) { raid1_end_bh_io(r1_bh, uptodate); return; } /* * oops, read error: */ printk(KERN_ERR "raid1: %s: rescheduling block %lu\n", partition_name(bh->b_dev), bh->b_blocknr); raid1_reschedule_retry(r1_bh); return; } /* * WRITE: * * Let's see if all mirrored write operations have finished * already. */ if (atomic_dec_and_test(&r1_bh->remaining)) raid1_end_bh_io(r1_bh, test_bit(R1BH_Uptodate, &r1_bh->state));}/* * This routine returns the disk from which the requested read should * be done. It bookkeeps the last read position for every disk * in array and when new read requests come, the disk which last * position is nearest to the request, is chosen. * * TODO: now if there are 2 mirrors in the same 2 devices, performance * degrades dramatically because position is mirror, not device based. * This should be changed to be device based. Also atomic sequential * reads should be somehow balanced. */static int raid1_read_balance (raid1_conf_t *conf, struct buffer_head *bh){ int new_disk = conf->last_used; const int sectors = bh->b_size >> 9; const unsigned long this_sector = bh->b_rsector; int disk = new_disk; unsigned long new_distance; unsigned long current_distance; /* * Check if it is sane at all to balance */ if (conf->resync_mirrors) goto rb_out; /* make sure that disk is operational */ while( !conf->mirrors[new_disk].operational) { if (new_disk <= 0) new_disk = conf->raid_disks;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -