📄 dm-raid1.c
字号:
read_lock(&rh->hash_lock); reg = __rh_lookup(rh, region); read_unlock(&rh->hash_lock); spin_lock_irqsave(&rh->region_lock, flags); if (atomic_dec_and_test(®->pending)) { /* * There is no pending I/O for this region. * We can move the region to corresponding list for next action. * At this point, the region is not yet connected to any list. * * If the state is RH_NOSYNC, the region should be kept off * from clean list. * The hash entry for RH_NOSYNC will remain in memory * until the region is recovered or the map is reloaded. */ /* do nothing for RH_NOSYNC */ if (reg->state == RH_RECOVERING) { list_add_tail(®->list, &rh->quiesced_regions); } else if (reg->state == RH_DIRTY) { reg->state = RH_CLEAN; list_add(®->list, &rh->clean_regions); } should_wake = 1; } spin_unlock_irqrestore(&rh->region_lock, flags); if (should_wake) wake(rh->ms);}/* * Starts quiescing a region in preparation for recovery. */static int __rh_recovery_prepare(struct region_hash *rh){ int r; struct region *reg; region_t region; /* * Ask the dirty log what's next. */ r = rh->log->type->get_resync_work(rh->log, ®ion); if (r <= 0) return r; /* * Get this region, and start it quiescing by setting the * recovering flag. */ read_lock(&rh->hash_lock); reg = __rh_find(rh, region); read_unlock(&rh->hash_lock); spin_lock_irq(&rh->region_lock); reg->state = RH_RECOVERING; /* Already quiesced ? */ if (atomic_read(®->pending)) list_del_init(®->list); else list_move(®->list, &rh->quiesced_regions); spin_unlock_irq(&rh->region_lock); return 1;}static void rh_recovery_prepare(struct region_hash *rh){ /* Extra reference to avoid race with rh_stop_recovery */ atomic_inc(&rh->recovery_in_flight); while (!down_trylock(&rh->recovery_count)) { atomic_inc(&rh->recovery_in_flight); if (__rh_recovery_prepare(rh) <= 0) { atomic_dec(&rh->recovery_in_flight); up(&rh->recovery_count); break; } } /* Drop the extra reference */ if (atomic_dec_and_test(&rh->recovery_in_flight)) wake_up_all(&_kmirrord_recovery_stopped);}/* * Returns any quiesced regions. */static struct region *rh_recovery_start(struct region_hash *rh){ struct region *reg = NULL; spin_lock_irq(&rh->region_lock); if (!list_empty(&rh->quiesced_regions)) { reg = list_entry(rh->quiesced_regions.next, struct region, list); list_del_init(®->list); /* remove from the quiesced list */ } spin_unlock_irq(&rh->region_lock); return reg;}static void rh_recovery_end(struct region *reg, int success){ struct region_hash *rh = reg->rh; spin_lock_irq(&rh->region_lock); if (success) list_add(®->list, ®->rh->recovered_regions); else { reg->state = RH_NOSYNC; list_add(®->list, ®->rh->failed_recovered_regions); } spin_unlock_irq(&rh->region_lock); wake(rh->ms);}static int rh_flush(struct region_hash *rh){ return rh->log->type->flush(rh->log);}static void rh_delay(struct region_hash *rh, struct bio *bio){ struct region *reg; read_lock(&rh->hash_lock); reg = __rh_find(rh, bio_to_region(rh, bio)); bio_list_add(®->delayed_bios, bio); read_unlock(&rh->hash_lock);}static void rh_stop_recovery(struct region_hash *rh){ int i; /* wait for any recovering regions */ for (i = 0; i < MAX_RECOVERY; i++) down(&rh->recovery_count);}static void rh_start_recovery(struct region_hash *rh){ int i; for (i = 0; i < MAX_RECOVERY; i++) up(&rh->recovery_count); wake(rh->ms);}/* * Every mirror should look like this one. */#define DEFAULT_MIRROR 0/* * This is yucky. We squirrel the mirror_set struct away inside * bi_next for write buffers. This is safe since the bh * doesn't get submitted to the lower levels of block layer. */static struct mirror_set *bio_get_ms(struct bio *bio){ return (struct mirror_set *) bio->bi_next;}static void bio_set_ms(struct bio *bio, struct mirror_set *ms){ bio->bi_next = (struct bio *) ms;}/*----------------------------------------------------------------- * Recovery. * * When a mirror is first activated we may find that some regions * are in the no-sync state. We have to recover these by * recopying from the default mirror to all the others. *---------------------------------------------------------------*/static void recovery_complete(int read_err, unsigned int write_err, void *context){ struct region *reg = (struct region *) context; if (read_err) /* Read error means the failure of default mirror. */ DMERR_LIMIT("Unable to read primary mirror during recovery"); if (write_err) DMERR_LIMIT("Write error during recovery (error = 0x%x)", write_err); rh_recovery_end(reg, !(read_err || write_err));}static int recover(struct mirror_set *ms, struct region *reg){ int r; unsigned int i; struct io_region from, to[KCOPYD_MAX_REGIONS], *dest; struct mirror *m; unsigned long flags = 0; /* fill in the source */ m = ms->default_mirror; from.bdev = m->dev->bdev; from.sector = m->offset + region_to_sector(reg->rh, reg->key); if (reg->key == (ms->nr_regions - 1)) { /* * The final region may be smaller than * region_size. */ from.count = ms->ti->len & (reg->rh->region_size - 1); if (!from.count) from.count = reg->rh->region_size; } else from.count = reg->rh->region_size; /* fill in the destinations */ for (i = 0, dest = to; i < ms->nr_mirrors; i++) { if (&ms->mirror[i] == ms->default_mirror) continue; m = ms->mirror + i; dest->bdev = m->dev->bdev; dest->sector = m->offset + region_to_sector(reg->rh, reg->key); dest->count = from.count; dest++; } /* hand to kcopyd */ set_bit(KCOPYD_IGNORE_ERROR, &flags); r = kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags, recovery_complete, reg); return r;}static void do_recovery(struct mirror_set *ms){ int r; struct region *reg; struct dirty_log *log = ms->rh.log; /* * Start quiescing some regions. */ rh_recovery_prepare(&ms->rh); /* * Copy any already quiesced regions. */ while ((reg = rh_recovery_start(&ms->rh))) { r = recover(ms, reg); if (r) rh_recovery_end(reg, 0); } /* * Update the in sync flag. */ if (!ms->in_sync && (log->type->get_sync_count(log) == ms->nr_regions)) { /* the sync is complete */ dm_table_event(ms->ti->table); ms->in_sync = 1; }}/*----------------------------------------------------------------- * Reads *---------------------------------------------------------------*/static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector){ /* FIXME: add read balancing */ return ms->default_mirror;}/* * remap a buffer to a particular mirror. */static void map_bio(struct mirror_set *ms, struct mirror *m, struct bio *bio){ bio->bi_bdev = m->dev->bdev; bio->bi_sector = m->offset + (bio->bi_sector - ms->ti->begin);}static void do_reads(struct mirror_set *ms, struct bio_list *reads){ region_t region; struct bio *bio; struct mirror *m; while ((bio = bio_list_pop(reads))) { region = bio_to_region(&ms->rh, bio); /* * We can only read balance if the region is in sync. */ if (rh_in_sync(&ms->rh, region, 1)) m = choose_mirror(ms, bio->bi_sector); else m = ms->default_mirror; map_bio(ms, m, bio); generic_make_request(bio); }}/*----------------------------------------------------------------- * Writes. * * We do different things with the write io depending on the * state of the region that it's in: * * SYNC: increment pending, use kcopyd to write to *all* mirrors * RECOVERING: delay the io until recovery completes * NOSYNC: increment pending, just write to the default mirror *---------------------------------------------------------------*/static void write_callback(unsigned long error, void *context){ unsigned int i; int uptodate = 1; struct bio *bio = (struct bio *) context; struct mirror_set *ms; ms = bio_get_ms(bio); bio_set_ms(bio, NULL); /* * NOTE: We don't decrement the pending count here, * instead it is done by the targets endio function. * This way we handle both writes to SYNC and NOSYNC * regions with the same code. */ if (error) { /* * only error the io if all mirrors failed. * FIXME: bogus */ uptodate = 0; for (i = 0; i < ms->nr_mirrors; i++) if (!test_bit(i, &error)) { uptodate = 1; break; } } bio_endio(bio, 0);}static void do_write(struct mirror_set *ms, struct bio *bio){ unsigned int i; struct io_region io[KCOPYD_MAX_REGIONS+1]; struct mirror *m; struct dm_io_request io_req = { .bi_rw = WRITE, .mem.type = DM_IO_BVEC, .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, .notify.fn = write_callback, .notify.context = bio, .client = ms->io_client, }; for (i = 0; i < ms->nr_mirrors; i++) { m = ms->mirror + i; io[i].bdev = m->dev->bdev; io[i].sector = m->offset + (bio->bi_sector - ms->ti->begin); io[i].count = bio->bi_size >> 9; } bio_set_ms(bio, ms); (void) dm_io(&io_req, ms->nr_mirrors, io, NULL);}static void do_writes(struct mirror_set *ms, struct bio_list *writes){ int state; struct bio *bio; struct bio_list sync, nosync, recover, *this_list = NULL; if (!writes->head) return; /* * Classify each write. */ bio_list_init(&sync); bio_list_init(&nosync); bio_list_init(&recover); while ((bio = bio_list_pop(writes))) { state = rh_state(&ms->rh, bio_to_region(&ms->rh, bio), 1); switch (state) { case RH_CLEAN: case RH_DIRTY: this_list = &sync; break; case RH_NOSYNC: this_list = &nosync; break; case RH_RECOVERING: this_list = &recover; break; } bio_list_add(this_list, bio); } /* * Increment the pending counts for any regions that will * be written to (writes to recover regions are going to * be delayed). */ rh_inc_pending(&ms->rh, &sync); rh_inc_pending(&ms->rh, &nosync); ms->log_failure = rh_flush(&ms->rh) ? 1 : 0; /* * Dispatch io. */ if (unlikely(ms->log_failure)) while ((bio = bio_list_pop(&sync))) bio_endio(bio, -EIO); else while ((bio = bio_list_pop(&sync))) do_write(ms, bio); while ((bio = bio_list_pop(&recover))) rh_delay(&ms->rh, bio); while ((bio = bio_list_pop(&nosync))) { map_bio(ms, ms->default_mirror, bio); generic_make_request(bio); }}/*----------------------------------------------------------------- * kmirrord *---------------------------------------------------------------*/static void do_mirror(struct work_struct *work){ struct mirror_set *ms =container_of(work, struct mirror_set, kmirrord_work); struct bio_list reads, writes; spin_lock(&ms->lock); reads = ms->reads; writes = ms->writes; bio_list_init(&ms->reads); bio_list_init(&ms->writes); spin_unlock(&ms->lock); rh_update_states(&ms->rh); do_recovery(ms); do_reads(ms, &reads); do_writes(ms, &writes);}/*-----------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -