📄 dm-raid1.c

📁 Linux Kernel 2.6.9 for OMAP1710
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright (C) 2003 Sistina Software Limited. * * This file is released under the GPL. */#include "dm.h"#include "dm-bio-list.h"#include "dm-io.h"#include "dm-log.h"#include "kcopyd.h"#include <linux/ctype.h>#include <linux/init.h>#include <linux/mempool.h>#include <linux/module.h>#include <linux/pagemap.h>#include <linux/slab.h>#include <linux/time.h>#include <linux/vmalloc.h>#include <linux/workqueue.h>static struct workqueue_struct *_kmirrord_wq;static struct work_struct _kmirrord_work;static inline void wake(void){	queue_work(_kmirrord_wq, &_kmirrord_work);}/*----------------------------------------------------------------- * Region hash * * The mirror splits itself up into discrete regions.  Each * region can be in one of three states: clean, dirty, * nosync.  There is no need to put clean regions in the hash. * * In addition to being present in the hash table a region _may_ * be present on one of three lists. * *   clean_regions: Regions on this list have no io pending to *   them, they are in sync, we are no longer interested in them, *   they are dull.  rh_update_states() will remove them from the *   hash table. * *   quiesced_regions: These regions have been spun down, ready *   for recovery.  rh_recovery_start() will remove regions from *   this list and hand them to kmirrord, which will schedule the *   recovery io with kcopyd. * *   recovered_regions: Regions that kcopyd has successfully *   recovered.  rh_update_states() will now schedule any delayed *   io, up the recovery_count, and remove the region from the *   hash. * * There are 2 locks: *   A rw spin lock 'hash_lock' protects just the hash table, *   this is never held in write mode from interrupt context, *   which I believe means that we only have to disable irqs when *   doing a write lock. * *   An ordinary spin lock 'region_lock' that protects the three *   lists in the region_hash, with the 'state', 'list' and *   'bhs_delayed' fields of the regions.  This is used from irq *   context, so all other uses will have to suspend local irqs. *---------------------------------------------------------------*/struct mirror_set;struct region_hash {	struct mirror_set *ms;	sector_t region_size;	unsigned region_shift;	/* holds persistent region state */	struct dirty_log *log;	/* hash table */	rwlock_t hash_lock;	mempool_t *region_pool;	unsigned int mask;	unsigned int nr_buckets;	struct list_head *buckets;	spinlock_t region_lock;	struct semaphore recovery_count;	struct list_head clean_regions;	struct list_head quiesced_regions;	struct list_head recovered_regions;};enum {	RH_CLEAN,	RH_DIRTY,	RH_NOSYNC,	RH_RECOVERING};struct region {	struct region_hash *rh;	/* FIXME: can we get rid of this ? */	region_t key;	int state;	struct list_head hash_list;	struct list_head list;	atomic_t pending;	struct bio_list delayed_bios;};/* * Conversion fns */static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio){	return bio->bi_sector >> rh->region_shift;}static inline sector_t region_to_sector(struct region_hash *rh, region_t region){	return region << rh->region_shift;}/* FIXME move this */static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);static void *region_alloc(int gfp_mask, void *pool_data){	return kmalloc(sizeof(struct region), gfp_mask);}static void region_free(void *element, void *pool_data){	kfree(element);}#define MIN_REGIONS 64#define MAX_RECOVERY 1static int rh_init(struct region_hash *rh, struct mirror_set *ms,		   struct dirty_log *log, sector_t region_size,		   region_t nr_regions){	unsigned int nr_buckets, max_buckets;	size_t i;	/*	 * Calculate a suitable number of buckets for our hash	 * table.	 */	max_buckets = nr_regions >> 6;	for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)		;	nr_buckets >>= 1;	rh->ms = ms;	rh->log = log;	rh->region_size = region_size;	rh->region_shift = ffs(region_size) - 1;	rwlock_init(&rh->hash_lock);	rh->mask = nr_buckets - 1;	rh->nr_buckets = nr_buckets;	rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));	if (!rh->buckets) {		DMERR("unable to allocate region hash memory");		return -ENOMEM;	}	for (i = 0; i < nr_buckets; i++)		INIT_LIST_HEAD(rh->buckets + i);	spin_lock_init(&rh->region_lock);	sema_init(&rh->recovery_count, 0);	INIT_LIST_HEAD(&rh->clean_regions);	INIT_LIST_HEAD(&rh->quiesced_regions);	INIT_LIST_HEAD(&rh->recovered_regions);	rh->region_pool = mempool_create(MIN_REGIONS, region_alloc,					 region_free, NULL);	if (!rh->region_pool) {		vfree(rh->buckets);		rh->buckets = NULL;		return -ENOMEM;	}	return 0;}static void rh_exit(struct region_hash *rh){	unsigned int h;	struct region *reg, *nreg;	BUG_ON(!list_empty(&rh->quiesced_regions));	for (h = 0; h < rh->nr_buckets; h++) {		list_for_each_entry_safe(reg, nreg, rh->buckets + h, hash_list) {			BUG_ON(atomic_read(&reg->pending));			mempool_free(reg, rh->region_pool);		}	}	if (rh->log)		dm_destroy_dirty_log(rh->log);	if (rh->region_pool)		mempool_destroy(rh->region_pool);	vfree(rh->buckets);}#define RH_HASH_MULT 2654435387Ustatic inline unsigned int rh_hash(struct region_hash *rh, region_t region){	return (unsigned int) ((region * RH_HASH_MULT) >> 12) & rh->mask;}static struct region *__rh_lookup(struct region_hash *rh, region_t region){	struct region *reg;	list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list)		if (reg->key == region)			return reg;	return NULL;}static void __rh_insert(struct region_hash *rh, struct region *reg){	unsigned int h = rh_hash(rh, reg->key);	list_add(&reg->hash_list, rh->buckets + h);}static struct region *__rh_alloc(struct region_hash *rh, region_t region){	struct region *reg, *nreg;	read_unlock(&rh->hash_lock);	nreg = mempool_alloc(rh->region_pool, GFP_NOIO);	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?		RH_CLEAN : RH_NOSYNC;	nreg->rh = rh;	nreg->key = region;	INIT_LIST_HEAD(&nreg->list);	atomic_set(&nreg->pending, 0);	bio_list_init(&nreg->delayed_bios);	write_lock_irq(&rh->hash_lock);	reg = __rh_lookup(rh, region);	if (reg)		/* we lost the race */		mempool_free(nreg, rh->region_pool);	else {		__rh_insert(rh, nreg);		if (nreg->state == RH_CLEAN) {			spin_lock_irq(&rh->region_lock);			list_add(&nreg->list, &rh->clean_regions);			spin_unlock_irq(&rh->region_lock);		}		reg = nreg;	}	write_unlock_irq(&rh->hash_lock);	read_lock(&rh->hash_lock);	return reg;}static inline struct region *__rh_find(struct region_hash *rh, region_t region){	struct region *reg;	reg = __rh_lookup(rh, region);	if (!reg)		reg = __rh_alloc(rh, region);	return reg;}static int rh_state(struct region_hash *rh, region_t region, int may_block){	int r;	struct region *reg;	read_lock(&rh->hash_lock);	reg = __rh_lookup(rh, region);	read_unlock(&rh->hash_lock);	if (reg)		return reg->state;	/*	 * The region wasn't in the hash, so we fall back to the	 * dirty log.	 */	r = rh->log->type->in_sync(rh->log, region, may_block);	/*	 * Any error from the dirty log (eg. -EWOULDBLOCK) gets	 * taken as a RH_NOSYNC	 */	return r == 1 ? RH_CLEAN : RH_NOSYNC;}static inline int rh_in_sync(struct region_hash *rh,			     region_t region, int may_block){	int state = rh_state(rh, region, may_block);	return state == RH_CLEAN || state == RH_DIRTY;}static void dispatch_bios(struct mirror_set *ms, struct bio_list *bio_list){	struct bio *bio;	while ((bio = bio_list_pop(bio_list))) {		queue_bio(ms, bio, WRITE);	}}static void rh_update_states(struct region_hash *rh){	struct region *reg, *next;	LIST_HEAD(clean);	LIST_HEAD(recovered);	/*	 * Quickly grab the lists.	 */	write_lock_irq(&rh->hash_lock);	spin_lock(&rh->region_lock);	if (!list_empty(&rh->clean_regions)) {		list_splice(&rh->clean_regions, &clean);		INIT_LIST_HEAD(&rh->clean_regions);		list_for_each_entry (reg, &clean, list) {			rh->log->type->clear_region(rh->log, reg->key);			list_del(&reg->hash_list);		}	}	if (!list_empty(&rh->recovered_regions)) {		list_splice(&rh->recovered_regions, &recovered);		INIT_LIST_HEAD(&rh->recovered_regions);		list_for_each_entry (reg, &recovered, list)			list_del(&reg->hash_list);	}	spin_unlock(&rh->region_lock);	write_unlock_irq(&rh->hash_lock);	/*	 * All the regions on the recovered and clean lists have	 * now been pulled out of the system, so no need to do	 * any more locking.	 */	list_for_each_entry_safe (reg, next, &recovered, list) {		rh->log->type->clear_region(rh->log, reg->key);		rh->log->type->complete_resync_work(rh->log, reg->key, 1);		dispatch_bios(rh->ms, &reg->delayed_bios);		up(&rh->recovery_count);		mempool_free(reg, rh->region_pool);	}	if (!list_empty(&recovered))		rh->log->type->flush(rh->log);	list_for_each_entry_safe (reg, next, &clean, list)		mempool_free(reg, rh->region_pool);}static void rh_inc(struct region_hash *rh, region_t region){	struct region *reg;	read_lock(&rh->hash_lock);	reg = __rh_find(rh, region);	if (reg->state == RH_CLEAN) {		rh->log->type->mark_region(rh->log, reg->key);		spin_lock_irq(&rh->region_lock);		reg->state = RH_DIRTY;		list_del_init(&reg->list);	/* take off the clean list */		spin_unlock_irq(&rh->region_lock);	}	atomic_inc(&reg->pending);	read_unlock(&rh->hash_lock);}static void rh_inc_pending(struct region_hash *rh, struct bio_list *bios){	struct bio *bio;	for (bio = bios->head; bio; bio = bio->bi_next)		rh_inc(rh, bio_to_region(rh, bio));}static void rh_dec(struct region_hash *rh, region_t region){	unsigned long flags;	struct region *reg;	int should_wake = 0;	read_lock(&rh->hash_lock);	reg = __rh_lookup(rh, region);	read_unlock(&rh->hash_lock);	if (atomic_dec_and_test(&reg->pending)) {		spin_lock_irqsave(&rh->region_lock, flags);		if (reg->state == RH_RECOVERING) {			list_add_tail(&reg->list, &rh->quiesced_regions);		} else {			reg->state = RH_CLEAN;			list_add(&reg->list, &rh->clean_regions);		}		spin_unlock_irqrestore(&rh->region_lock, flags);		should_wake = 1;	}	if (should_wake)		wake();}/* * Starts quiescing a region in preparation for recovery. */static int __rh_recovery_prepare(struct region_hash *rh){	int r;	struct region *reg;	region_t region;	/*	 * Ask the dirty log what's next.	 */	r = rh->log->type->get_resync_work(rh->log, &region);	if (r <= 0)		return r;	/*	 * Get this region, and start it quiescing by setting the	 * recovering flag.	 */	read_lock(&rh->hash_lock);	reg = __rh_find(rh, region);	read_unlock(&rh->hash_lock);	spin_lock_irq(&rh->region_lock);	reg->state = RH_RECOVERING;	/* Already quiesced ? */	if (atomic_read(&reg->pending))		list_del_init(&reg->list);	else {		list_del_init(&reg->list);		list_add(&reg->list, &rh->quiesced_regions);	}	spin_unlock_irq(&rh->region_lock);	return 1;}static void rh_recovery_prepare(struct region_hash *rh){	while (!down_trylock(&rh->recovery_count))		if (__rh_recovery_prepare(rh) <= 0) {			up(&rh->recovery_count);			break;		}}/* * Returns any quiesced regions. */static struct region *rh_recovery_start(struct region_hash *rh){	struct region *reg = NULL;	spin_lock_irq(&rh->region_lock);	if (!list_empty(&rh->quiesced_regions)) {		reg = list_entry(rh->quiesced_regions.next,				 struct region, list);		list_del_init(&reg->list);	/* remove from the quiesced list */	}	spin_unlock_irq(&rh->region_lock);	return reg;}/* FIXME: success ignored for now */static void rh_recovery_end(struct region *reg, int success){	struct region_hash *rh = reg->rh;	spin_lock_irq(&rh->region_lock);	list_add(&reg->list, &reg->rh->recovered_regions);	spin_unlock_irq(&rh->region_lock);	wake();}static void rh_flush(struct region_hash *rh){	rh->log->type->flush(rh->log);}static void rh_delay(struct region_hash *rh, struct bio *bio){	struct region *reg;	read_lock(&rh->hash_lock);	reg = __rh_find(rh, bio_to_region(rh, bio));	bio_list_add(&reg->delayed_bios, bio);	read_unlock(&rh->hash_lock);}static void rh_stop_recovery(struct region_hash *rh){	int i;	/* wait for any recovering regions */	for (i = 0; i < MAX_RECOVERY; i++)		down(&rh->recovery_count);}static void rh_start_recovery(struct region_hash *rh){	int i;	for (i = 0; i < MAX_RECOVERY; i++)		up(&rh->recovery_count);	wake();}/*----------------------------------------------------------------- * Mirror set structures. *---------------------------------------------------------------*/struct mirror {	atomic_t error_count;	struct dm_dev *dev;	sector_t offset;};struct mirror_set {	struct dm_target *ti;	struct list_head list;	struct region_hash rh;	struct kcopyd_client *kcopyd_client;	spinlock_t lock;	/* protects the next two lists */	struct bio_list reads;	struct bio_list writes;	/* recovery */	region_t nr_regions;	int in_sync;	unsigned int nr_mirrors;	struct mirror mirror[0];};/* * Every mirror should look like this one. */#define DEFAULT_MIRROR 0/* * This is yucky.  We squirrel the mirror_set struct away inside * bi_next for write buffers.  This is safe since the bh * doesn't get submitted to the lower levels of block layer. */static struct mirror_set *bio_get_ms(struct bio *bio){	return (struct mirror_set *) bio->bi_next;}static void bio_set_ms(struct bio *bio, struct mirror_set *ms){	bio->bi_next = (struct bio *) ms;}/*----------------------------------------------------------------- * Recovery. * * When a mirror is first activated we may find that some regions * are in the no-sync state.  We have to recover these by * recopying from the default mirror to all the others. *---------------------------------------------------------------*/static void recovery_complete(int read_err, unsigned int write_err,			      void *context){	struct region *reg = (struct region *) context;	/* FIXME: better error handling */	rh_recovery_end(reg, read_err || write_err);}static int recover(struct mirror_set *ms, struct region *reg){	int r;	unsigned int i;	struct io_region from, to[KCOPYD_MAX_REGIONS], *dest;	struct mirror *m;	unsigned long flags = 0;	/* fill in the source */	m = ms->mirror + DEFAULT_MIRROR;	from.bdev = m->dev->bdev;	from.sector = m->offset + region_to_sector(reg->rh, reg->key);	if (reg->key == (ms->nr_regions - 1)) {		/*		 * The final region may be smaller than		 * region_size.		 */		from.count = ms->ti->len & (reg->rh->region_size - 1);		if (!from.count)			from.count = reg->rh->region_size;	} else		from.count = reg->rh->region_size;	/* fill in the destinations */	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {		if (i == DEFAULT_MIRROR)			continue;		m = ms->mirror + i;		dest->bdev = m->dev->bdev;		dest->sector = m->offset + region_to_sector(reg->rh, reg->key);		dest->count = from.count;		dest++;	}	/* hand to kcopyd */	set_bit(KCOPYD_IGNORE_ERROR, &flags);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -