📄 dm-snap.c
字号:
/* * dm-snapshot.c * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * * This file is released under the GPL. */#include <linux/blkdev.h>#include <linux/ctype.h>#include <linux/device-mapper.h>#include <linux/fs.h>#include <linux/init.h>#include <linux/kdev_t.h>#include <linux/list.h>#include <linux/mempool.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/vmalloc.h>#include <linux/log2.h>#include "dm-snap.h"#include "dm-bio-list.h"#include "kcopyd.h"#define DM_MSG_PREFIX "snapshots"/* * The percentage increment we will wake up users at */#define WAKE_UP_PERCENT 5/* * kcopyd priority of snapshot operations */#define SNAPSHOT_COPY_PRIORITY 2/* * Each snapshot reserves this many pages for io */#define SNAPSHOT_PAGES 256static struct workqueue_struct *ksnapd;static void flush_queued_bios(struct work_struct *work);struct dm_snap_pending_exception { struct dm_snap_exception e; /* * Origin buffers waiting for this to complete are held * in a bio list */ struct bio_list origin_bios; struct bio_list snapshot_bios; /* * Short-term queue of pending exceptions prior to submission. */ struct list_head list; /* * The primary pending_exception is the one that holds * the ref_count and the list of origin_bios for a * group of pending_exceptions. It is always last to get freed. * These fields get set up when writing to the origin. */ struct dm_snap_pending_exception *primary_pe; /* * Number of pending_exceptions processing this chunk. * When this drops to zero we must complete the origin bios. * If incrementing or decrementing this, hold pe->snap->lock for * the sibling concerned and not pe->primary_pe->snap->lock unless * they are the same. */ atomic_t ref_count; /* Pointer back to snapshot context */ struct dm_snapshot *snap; /* * 1 indicates the exception has already been sent to * kcopyd. */ int started;};/* * Hash table mapping origin volumes to lists of snapshots and * a lock to protect it */static struct kmem_cache *exception_cache;static struct kmem_cache *pending_cache;static mempool_t *pending_pool;/* * One of these per registered origin, held in the snapshot_origins hash */struct origin { /* The origin device */ struct block_device *bdev; struct list_head hash_list; /* List of snapshots for this origin */ struct list_head snapshots;};/* * Size of the hash table for origin volumes. If we make this * the size of the minors list then it should be nearly perfect */#define ORIGIN_HASH_SIZE 256#define ORIGIN_MASK 0xFFstatic struct list_head *_origins;static struct rw_semaphore _origins_lock;static int init_origin_hash(void){ int i; _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), GFP_KERNEL); if (!_origins) { DMERR("unable to allocate memory"); return -ENOMEM; } for (i = 0; i < ORIGIN_HASH_SIZE; i++) INIT_LIST_HEAD(_origins + i); init_rwsem(&_origins_lock); return 0;}static void exit_origin_hash(void){ kfree(_origins);}static unsigned origin_hash(struct block_device *bdev){ return bdev->bd_dev & ORIGIN_MASK;}static struct origin *__lookup_origin(struct block_device *origin){ struct list_head *ol; struct origin *o; ol = &_origins[origin_hash(origin)]; list_for_each_entry (o, ol, hash_list) if (bdev_equal(o->bdev, origin)) return o; return NULL;}static void __insert_origin(struct origin *o){ struct list_head *sl = &_origins[origin_hash(o->bdev)]; list_add_tail(&o->hash_list, sl);}/* * Make a note of the snapshot and its origin so we can look it * up when the origin has a write on it. */static int register_snapshot(struct dm_snapshot *snap){ struct origin *o; struct block_device *bdev = snap->origin->bdev; down_write(&_origins_lock); o = __lookup_origin(bdev); if (!o) { /* New origin */ o = kmalloc(sizeof(*o), GFP_KERNEL); if (!o) { up_write(&_origins_lock); return -ENOMEM; } /* Initialise the struct */ INIT_LIST_HEAD(&o->snapshots); o->bdev = bdev; __insert_origin(o); } list_add_tail(&snap->list, &o->snapshots); up_write(&_origins_lock); return 0;}static void unregister_snapshot(struct dm_snapshot *s){ struct origin *o; down_write(&_origins_lock); o = __lookup_origin(s->origin->bdev); list_del(&s->list); if (list_empty(&o->snapshots)) { list_del(&o->hash_list); kfree(o); } up_write(&_origins_lock);}/* * Implementation of the exception hash tables. */static int init_exception_table(struct exception_table *et, uint32_t size){ unsigned int i; et->hash_mask = size - 1; et->table = dm_vcalloc(size, sizeof(struct list_head)); if (!et->table) return -ENOMEM; for (i = 0; i < size; i++) INIT_LIST_HEAD(et->table + i); return 0;}static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem){ struct list_head *slot; struct dm_snap_exception *ex, *next; int i, size; size = et->hash_mask + 1; for (i = 0; i < size; i++) { slot = et->table + i; list_for_each_entry_safe (ex, next, slot, hash_list) kmem_cache_free(mem, ex); } vfree(et->table);}static uint32_t exception_hash(struct exception_table *et, chunk_t chunk){ return chunk & et->hash_mask;}static void insert_exception(struct exception_table *eh, struct dm_snap_exception *e){ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; list_add(&e->hash_list, l);}static void remove_exception(struct dm_snap_exception *e){ list_del(&e->hash_list);}/* * Return the exception data for a sector, or NULL if not * remapped. */static struct dm_snap_exception *lookup_exception(struct exception_table *et, chunk_t chunk){ struct list_head *slot; struct dm_snap_exception *e; slot = &et->table[exception_hash(et, chunk)]; list_for_each_entry (e, slot, hash_list) if (e->old_chunk == chunk) return e; return NULL;}static struct dm_snap_exception *alloc_exception(void){ struct dm_snap_exception *e; e = kmem_cache_alloc(exception_cache, GFP_NOIO); if (!e) e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); return e;}static void free_exception(struct dm_snap_exception *e){ kmem_cache_free(exception_cache, e);}static struct dm_snap_pending_exception *alloc_pending_exception(void){ return mempool_alloc(pending_pool, GFP_NOIO);}static void free_pending_exception(struct dm_snap_pending_exception *pe){ mempool_free(pe, pending_pool);}int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new){ struct dm_snap_exception *e; e = alloc_exception(); if (!e) return -ENOMEM; e->old_chunk = old; e->new_chunk = new; insert_exception(&s->complete, e); return 0;}/* * Hard coded magic. */static int calc_max_buckets(void){ /* use a fixed size of 2MB */ unsigned long mem = 2 * 1024 * 1024; mem /= sizeof(struct list_head); return mem;}/* * Rounds a number down to a power of 2. */static uint32_t round_down(uint32_t n){ while (n & (n - 1)) n &= (n - 1); return n;}/* * Allocate room for a suitable hash table. */static int init_hash_tables(struct dm_snapshot *s){ sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; /* * Calculate based on the size of the original volume or * the COW volume... */ cow_dev_size = get_dev_size(s->cow->bdev); origin_dev_size = get_dev_size(s->origin->bdev); max_buckets = calc_max_buckets(); hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift; hash_size = min(hash_size, max_buckets); /* Round it down to a power of 2 */ hash_size = round_down(hash_size); if (init_exception_table(&s->complete, hash_size)) return -ENOMEM; /* * Allocate hash table for in-flight exceptions * Make this smaller than the real hash table */ hash_size >>= 3; if (hash_size < 64) hash_size = 64; if (init_exception_table(&s->pending, hash_size)) { exit_exception_table(&s->complete, exception_cache); return -ENOMEM; } return 0;}/* * Round a number up to the nearest 'size' boundary. size must * be a power of 2. */static ulong round_up(ulong n, ulong size){ size--; return (n + size) & ~size;}static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, char **error){ unsigned long chunk_size; char *value; chunk_size = simple_strtoul(chunk_size_arg, &value, 10); if (*chunk_size_arg == '\0' || *value != '\0') { *error = "Invalid chunk size"; return -EINVAL; } if (!chunk_size) { s->chunk_size = s->chunk_mask = s->chunk_shift = 0; return 0; } /* * Chunk size must be multiple of page size. Silently * round up if it's not. */ chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); /* Check chunk_size is a power of 2 */ if (!is_power_of_2(chunk_size)) { *error = "Chunk size is not a power of 2"; return -EINVAL; } /* Validate the chunk size against the device block size */ if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } s->chunk_size = chunk_size; s->chunk_mask = chunk_size - 1; s->chunk_shift = ffs(chunk_size) - 1; return 0;}/* * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> */static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv){ struct dm_snapshot *s; int r = -EINVAL; char persistent; char *origin_path; char *cow_path; if (argc != 4) { ti->error = "requires exactly 4 arguments"; r = -EINVAL; goto bad1; } origin_path = argv[0]; cow_path = argv[1]; persistent = toupper(*argv[2]); if (persistent != 'P' && persistent != 'N') { ti->error = "Persistent flag is not P or N"; r = -EINVAL; goto bad1; } s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) { ti->error = "Cannot allocate snapshot context private " "structure"; r = -ENOMEM; goto bad1; } r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); if (r) { ti->error = "Cannot get origin device"; goto bad2; } r = dm_get_device(ti, cow_path, 0, 0, FMODE_READ | FMODE_WRITE, &s->cow); if (r) { dm_put_device(ti, s->origin); ti->error = "Cannot get COW device"; goto bad2; } r = set_chunk_size(s, argv[3], &ti->error); if (r) goto bad3; s->type = persistent; s->valid = 1; s->active = 0; s->last_percent = 0; init_rwsem(&s->lock); spin_lock_init(&s->pe_lock); s->table = ti->table; /* Allocate hash table for COW data */ if (init_hash_tables(s)) { ti->error = "Unable to allocate hash table space"; r = -ENOMEM; goto bad3; } s->store.snap = s; if (persistent == 'P') r = dm_create_persistent(&s->store); else r = dm_create_transient(&s->store); if (r) { ti->error = "Couldn't create exception store"; r = -EINVAL; goto bad4; } r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); if (r) { ti->error = "Could not create kcopyd client"; goto bad5; } /* Metadata must only be loaded into one table at once */ r = s->store.read_metadata(&s->store); if (r < 0) { ti->error = "Failed to read snapshot metadata"; goto bad6; } else if (r > 0) { s->valid = 0; DMWARN("Snapshot is marked invalid."); } bio_list_init(&s->queued_bios); INIT_WORK(&s->queued_bios_work, flush_queued_bios); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ if (register_snapshot(s)) { r = -EINVAL; ti->error = "Cannot register snapshot origin"; goto bad6; } ti->private = s; ti->split_io = s->chunk_size; return 0; bad6: kcopyd_client_destroy(s->kcopyd_client); bad5: s->store.destroy(&s->store); bad4: exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); bad3: dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); bad2: kfree(s); bad1: return r;}static void __free_exceptions(struct dm_snapshot *s){ kcopyd_client_destroy(s->kcopyd_client); s->kcopyd_client = NULL; exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); s->store.destroy(&s->store);}static void snapshot_dtr(struct dm_target *ti){ struct dm_snapshot *s = ti->private; flush_workqueue(ksnapd); /* Prevent further origin writes from using this snapshot. */ /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); __free_exceptions(s); dm_put_device(ti, s->origin); dm_put_device(ti, s->cow); kfree(s);}/* * Flush a list of buffers. */static void flush_bios(struct bio *bio){ struct bio *n; while (bio) { n = bio->bi_next; bio->bi_next = NULL; generic_make_request(bio); bio = n; }}static void flush_queued_bios(struct work_struct *work){ struct dm_snapshot *s = container_of(work, struct dm_snapshot, queued_bios_work); struct bio *queued_bios; unsigned long flags; spin_lock_irqsave(&s->pe_lock, flags); queued_bios = bio_list_get(&s->queued_bios); spin_unlock_irqrestore(&s->pe_lock, flags); flush_bios(queued_bios);}/* * Error a list of buffers. */static void error_bios(struct bio *bio){ struct bio *n; while (bio) { n = bio->bi_next; bio->bi_next = NULL; bio_io_error(bio); bio = n; }}static void __invalidate_snapshot(struct dm_snapshot *s, int err){ if (!s->valid) return; if (err == -EIO) DMERR("Invalidating snapshot: Error reading/writing."); else if (err == -ENOMEM) DMERR("Invalidating snapshot: Unable to allocate exception."); if (s->store.drop_snapshot) s->store.drop_snapshot(&s->store); s->valid = 0; dm_table_event(s->table);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -