📄 raid1.c

📁 linux和2410结合开发用他可以生成2410所需的zImage文件
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * raid1.c : Multiple Devices driver for Linux * * Copyright (C) 1999, 2000 Ingo Molnar, Red Hat * * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman * * RAID-1 management functions. * * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000 * * Fixes to reconstruction by Jakob 豷tergaard" <jakob@ostenfeld.dk> * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/module.h>#include <linux/slab.h>#include <linux/raid/raid1.h>#include <asm/atomic.h>#define MAJOR_NR MD_MAJOR#define MD_DRIVER#define MD_PERSONALITY#define MAX_WORK_PER_DISK 128#define	NR_RESERVED_BUFS	32/* * The following can be used to debug the driver */#define RAID1_DEBUG	0#if RAID1_DEBUG#define PRINTK(x...)   printk(x)#define inline#define __inline__#else#define PRINTK(x...)  do { } while (0)#endifstatic mdk_personality_t raid1_personality;static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;struct raid1_bh *raid1_retry_list = NULL, **raid1_retry_tail;static struct buffer_head *raid1_alloc_bh(raid1_conf_t *conf, int cnt){	/* return a linked list of "cnt" struct buffer_heads.	 * don't take any off the free list unless we know we can	 * get all we need, otherwise we could deadlock	 */	struct buffer_head *bh=NULL;	while(cnt) {		struct buffer_head *t;		md_spin_lock_irq(&conf->device_lock);		if (!conf->freebh_blocked && conf->freebh_cnt >= cnt)			while (cnt) {				t = conf->freebh;				conf->freebh = t->b_next;				t->b_next = bh;				bh = t;				t->b_state = 0;				conf->freebh_cnt--;				cnt--;			}		md_spin_unlock_irq(&conf->device_lock);		if (cnt == 0)			break;		t = kmem_cache_alloc(bh_cachep, SLAB_NOIO);		if (t) {			t->b_next = bh;			bh = t;			cnt--;		} else {			PRINTK("raid1: waiting for %d bh\n", cnt);			conf->freebh_blocked = 1;			wait_disk_event(conf->wait_buffer,					!conf->freebh_blocked ||					conf->freebh_cnt > conf->raid_disks * NR_RESERVED_BUFS/2);			conf->freebh_blocked = 0;		}	}	return bh;}static inline void raid1_free_bh(raid1_conf_t *conf, struct buffer_head *bh){	unsigned long flags;	spin_lock_irqsave(&conf->device_lock, flags);	while (bh) {		struct buffer_head *t = bh;		bh=bh->b_next;		if (t->b_pprev == NULL)			kmem_cache_free(bh_cachep, t);		else {			t->b_next= conf->freebh;			conf->freebh = t;			conf->freebh_cnt++;		}	}	spin_unlock_irqrestore(&conf->device_lock, flags);	wake_up(&conf->wait_buffer);}static int raid1_grow_bh(raid1_conf_t *conf, int cnt){	/* allocate cnt buffer_heads, possibly less if kmalloc fails */	int i = 0;	while (i < cnt) {		struct buffer_head *bh;		bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);		if (!bh) break;		md_spin_lock_irq(&conf->device_lock);		bh->b_pprev = &conf->freebh;		bh->b_next = conf->freebh;		conf->freebh = bh;		conf->freebh_cnt++;		md_spin_unlock_irq(&conf->device_lock);		i++;	}	return i;}static void raid1_shrink_bh(raid1_conf_t *conf){	/* discard all buffer_heads */	md_spin_lock_irq(&conf->device_lock);	while (conf->freebh) {		struct buffer_head *bh = conf->freebh;		conf->freebh = bh->b_next;		kmem_cache_free(bh_cachep, bh);		conf->freebh_cnt--;	}	md_spin_unlock_irq(&conf->device_lock);}		static struct raid1_bh *raid1_alloc_r1bh(raid1_conf_t *conf){	struct raid1_bh *r1_bh = NULL;	do {		md_spin_lock_irq(&conf->device_lock);		if (!conf->freer1_blocked && conf->freer1) {			r1_bh = conf->freer1;			conf->freer1 = r1_bh->next_r1;			conf->freer1_cnt--;			r1_bh->next_r1 = NULL;			r1_bh->state = (1 << R1BH_PreAlloc);			r1_bh->bh_req.b_state = 0;		}		md_spin_unlock_irq(&conf->device_lock);		if (r1_bh)			return r1_bh;		r1_bh = (struct raid1_bh *) kmalloc(sizeof(struct raid1_bh), GFP_NOIO);		if (r1_bh) {			memset(r1_bh, 0, sizeof(*r1_bh));			return r1_bh;		}		conf->freer1_blocked = 1;		wait_disk_event(conf->wait_buffer,				!conf->freer1_blocked ||				conf->freer1_cnt > NR_RESERVED_BUFS/2			);		conf->freer1_blocked = 0;	} while (1);}static inline void raid1_free_r1bh(struct raid1_bh *r1_bh){	struct buffer_head *bh = r1_bh->mirror_bh_list;	raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev);	r1_bh->mirror_bh_list = NULL;	if (test_bit(R1BH_PreAlloc, &r1_bh->state)) {		unsigned long flags;		spin_lock_irqsave(&conf->device_lock, flags);		r1_bh->next_r1 = conf->freer1;		conf->freer1 = r1_bh;		conf->freer1_cnt++;		spin_unlock_irqrestore(&conf->device_lock, flags);		/* don't need to wakeup wait_buffer because		 *  raid1_free_bh below will do that		 */	} else {		kfree(r1_bh);	}	raid1_free_bh(conf, bh);}static int raid1_grow_r1bh (raid1_conf_t *conf, int cnt){	int i = 0;	while (i < cnt) {		struct raid1_bh *r1_bh;		r1_bh = (struct raid1_bh*)kmalloc(sizeof(*r1_bh), GFP_KERNEL);		if (!r1_bh)			break;		memset(r1_bh, 0, sizeof(*r1_bh));		set_bit(R1BH_PreAlloc, &r1_bh->state);		r1_bh->mddev = conf->mddev;		raid1_free_r1bh(r1_bh);		i++;	}	return i;}static void raid1_shrink_r1bh(raid1_conf_t *conf){	md_spin_lock_irq(&conf->device_lock);	while (conf->freer1) {		struct raid1_bh *r1_bh = conf->freer1;		conf->freer1 = r1_bh->next_r1;		conf->freer1_cnt--;		kfree(r1_bh);	}	md_spin_unlock_irq(&conf->device_lock);}static inline void raid1_free_buf(struct raid1_bh *r1_bh){	unsigned long flags;	struct buffer_head *bh = r1_bh->mirror_bh_list;	raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev);	r1_bh->mirror_bh_list = NULL;		spin_lock_irqsave(&conf->device_lock, flags);	r1_bh->next_r1 = conf->freebuf;	conf->freebuf = r1_bh;	spin_unlock_irqrestore(&conf->device_lock, flags);	raid1_free_bh(conf, bh);}static struct raid1_bh *raid1_alloc_buf(raid1_conf_t *conf){	struct raid1_bh *r1_bh;	md_spin_lock_irq(&conf->device_lock);	wait_event_lock_irq(conf->wait_buffer, conf->freebuf, conf->device_lock);	r1_bh = conf->freebuf;	conf->freebuf = r1_bh->next_r1;	r1_bh->next_r1= NULL;	md_spin_unlock_irq(&conf->device_lock);	return r1_bh;}static int raid1_grow_buffers (raid1_conf_t *conf, int cnt){	int i = 0;	md_spin_lock_irq(&conf->device_lock);	while (i < cnt) {		struct raid1_bh *r1_bh;		struct page *page;		page = alloc_page(GFP_KERNEL);		if (!page)			break;		r1_bh = (struct raid1_bh *) kmalloc(sizeof(*r1_bh), GFP_KERNEL);		if (!r1_bh) {			__free_page(page);			break;		}		memset(r1_bh, 0, sizeof(*r1_bh));		r1_bh->bh_req.b_page = page;		r1_bh->bh_req.b_data = page_address(page);		r1_bh->next_r1 = conf->freebuf;		conf->freebuf = r1_bh;		i++;	}	md_spin_unlock_irq(&conf->device_lock);	return i;}static void raid1_shrink_buffers (raid1_conf_t *conf){	md_spin_lock_irq(&conf->device_lock);	while (conf->freebuf) {		struct raid1_bh *r1_bh = conf->freebuf;		conf->freebuf = r1_bh->next_r1;		__free_page(r1_bh->bh_req.b_page);		kfree(r1_bh);	}	md_spin_unlock_irq(&conf->device_lock);}static int raid1_map (mddev_t *mddev, kdev_t *rdev){	raid1_conf_t *conf = mddev_to_conf(mddev);	int i, disks = MD_SB_DISKS;	/*	 * Later we do read balancing on the read side 	 * now we use the first available disk.	 */	for (i = 0; i < disks; i++) {		if (conf->mirrors[i].operational) {			*rdev = conf->mirrors[i].dev;			return (0);		}	}	printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n");	return (-1);}static void raid1_reschedule_retry (struct raid1_bh *r1_bh){	unsigned long flags;	mddev_t *mddev = r1_bh->mddev;	raid1_conf_t *conf = mddev_to_conf(mddev);	md_spin_lock_irqsave(&retry_list_lock, flags);	if (raid1_retry_list == NULL)		raid1_retry_tail = &raid1_retry_list;	*raid1_retry_tail = r1_bh;	raid1_retry_tail = &r1_bh->next_r1;	r1_bh->next_r1 = NULL;	md_spin_unlock_irqrestore(&retry_list_lock, flags);	md_wakeup_thread(conf->thread);}static void inline io_request_done(unsigned long sector, raid1_conf_t *conf, int phase){	unsigned long flags;	spin_lock_irqsave(&conf->segment_lock, flags);	if (sector < conf->start_active)		conf->cnt_done--;	else if (sector >= conf->start_future && conf->phase == phase)		conf->cnt_future--;	else if (!--conf->cnt_pending)		wake_up(&conf->wait_ready);	spin_unlock_irqrestore(&conf->segment_lock, flags);}static void inline sync_request_done (unsigned long sector, raid1_conf_t *conf){	unsigned long flags;	spin_lock_irqsave(&conf->segment_lock, flags);	if (sector >= conf->start_ready)		--conf->cnt_ready;	else if (sector >= conf->start_active) {		if (!--conf->cnt_active) {			conf->start_active = conf->start_ready;			wake_up(&conf->wait_done);		}	}	spin_unlock_irqrestore(&conf->segment_lock, flags);}/* * raid1_end_bh_io() is called when we have finished servicing a mirrored * operation and are ready to return a success/failure code to the buffer * cache layer. */static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate){	struct buffer_head *bh = r1_bh->master_bh;	io_request_done(bh->b_rsector, mddev_to_conf(r1_bh->mddev),			test_bit(R1BH_SyncPhase, &r1_bh->state));	bh->b_end_io(bh, uptodate);	raid1_free_r1bh(r1_bh);}void raid1_end_request (struct buffer_head *bh, int uptodate){	struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private);	/*	 * this branch is our 'one mirror IO has finished' event handler:	 */	if (!uptodate)		md_error (r1_bh->mddev, bh->b_dev);	else		/*		 * Set R1BH_Uptodate in our master buffer_head, so that		 * we will return a good error code for to the higher		 * levels even if IO on some other mirrored buffer fails.		 *		 * The 'master' represents the complex operation to 		 * user-side. So if something waits for IO, then it will		 * wait for the 'master' buffer_head.		 */		set_bit (R1BH_Uptodate, &r1_bh->state);	/*	 * We split up the read and write side, imho they are 	 * conceptually different.	 */	if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) {		/*		 * we have only one buffer_head on the read side		 */				if (uptodate) {			raid1_end_bh_io(r1_bh, uptodate);			return;		}		/*		 * oops, read error:		 */		printk(KERN_ERR "raid1: %s: rescheduling block %lu\n", 			 partition_name(bh->b_dev), bh->b_blocknr);		raid1_reschedule_retry(r1_bh);		return;	}	/*	 * WRITE:	 *	 * Let's see if all mirrored write operations have finished 	 * already.	 */	if (atomic_dec_and_test(&r1_bh->remaining))		raid1_end_bh_io(r1_bh, test_bit(R1BH_Uptodate, &r1_bh->state));}/* * This routine returns the disk from which the requested read should * be done. It bookkeeps the last read position for every disk * in array and when new read requests come, the disk which last * position is nearest to the request, is chosen. * * TODO: now if there are 2 mirrors in the same 2 devices, performance * degrades dramatically because position is mirror, not device based. * This should be changed to be device based. Also atomic sequential * reads should be somehow balanced. */static int raid1_read_balance (raid1_conf_t *conf, struct buffer_head *bh){	int new_disk = conf->last_used;	const int sectors = bh->b_size >> 9;	const unsigned long this_sector = bh->b_rsector;	int disk = new_disk;	unsigned long new_distance;	unsigned long current_distance;		/*	 * Check if it is sane at all to balance	 */		if (conf->resync_mirrors)		goto rb_out;		/* make sure that disk is operational */	while( !conf->mirrors[new_disk].operational) {		if (new_disk <= 0) new_disk = conf->raid_disks;		new_disk--;		if (new_disk == disk) {			/*			 * This means no working disk was found			 * Nothing much to do, lets not change anything			 * and hope for the best...			 */						new_disk = conf->last_used;			goto rb_out;		}	}	disk = new_disk;	/* now disk == new_disk == starting point for search */		/*	 * Don't touch anything for sequential reads.	 */	if (this_sector == conf->mirrors[new_disk].head_position)		goto rb_out;		/*	 * If reads have been done only on a single disk	 * for a time, lets give another disk a change.	 * This is for kicking those idling disks so that	 * they would find work near some hotspot.	 */		if (conf->sect_count >= conf->mirrors[new_disk].sect_limit) {		conf->sect_count = 0;		do {			if (new_disk<=0)				new_disk = conf->raid_disks;			new_disk--;			if (new_disk == disk)				break;		} while ((conf->mirrors[new_disk].write_only) ||			 (!conf->mirrors[new_disk].operational));		goto rb_out;	}		current_distance = abs(this_sector -				conf->mirrors[disk].head_position);		/* Find the disk which is closest */		do {		if (disk <= 0)			disk = conf->raid_disks;		disk--;				if ((conf->mirrors[disk].write_only) ||				(!conf->mirrors[disk].operational))			continue;				new_distance = abs(this_sector -					conf->mirrors[disk].head_position);				if (new_distance < current_distance) {			conf->sect_count = 0;			current_distance = new_distance;			new_disk = disk;		}	} while (disk != conf->last_used);rb_out:	conf->mirrors[new_disk].head_position = this_sector + sectors;	conf->last_used = new_disk;	conf->sect_count += sectors;	return new_disk;}static int raid1_make_request (mddev_t *mddev, int rw,			       struct buffer_head * bh){	raid1_conf_t *conf = mddev_to_conf(mddev);	struct buffer_head *bh_req, *bhl;	struct raid1_bh * r1_bh;	int disks = MD_SB_DISKS;	int i, sum_bhs = 0;	struct mirror_info *mirror;	if (!buffer_locked(bh))		BUG();	/* * make_request() can abort the operation when READA is being * used and no empty request is available. * * Currently, just replace the command with READ/WRITE. */	if (rw == READA)		rw = READ;	r1_bh = raid1_alloc_r1bh (conf);	spin_lock_irq(&conf->segment_lock);	wait_event_lock_irq(conf->wait_done,			bh->b_rsector < conf->start_active ||			bh->b_rsector >= conf->start_future,			conf->segment_lock);	if (bh->b_rsector < conf->start_active) 		conf->cnt_done++;	else {		conf->cnt_future++;		if (conf->phase)			set_bit(R1BH_SyncPhase, &r1_bh->state);	}	spin_unlock_irq(&conf->segment_lock);		/*	 * i think the read and write branch should be separated completely,	 * since we want to do read balancing on the read side for example.	 * Alternative implementations? :) --mingo	 */	r1_bh->master_bh = bh;	r1_bh->mddev = mddev;	r1_bh->cmd = rw;	if (rw == READ) {		/*		 * read balancing logic:		 */		mirror = conf->mirrors + raid1_read_balance(conf, bh);
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -