raid1.c
来自「linux 内核源代码」· C语言 代码 · 共 2,200 行 · 第 1/4 页
C
2,200 行
static void end_sync_read(struct bio *bio, int error){ r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); int i; for (i=r1_bio->mddev->raid_disks; i--; ) if (r1_bio->bios[i] == bio) break; BUG_ON(i < 0); update_head_pos(i, r1_bio); /* * we have read a block, now it needs to be re-written, * or re-read if the read failed. * We don't do much here, just schedule handling by raid1d */ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) set_bit(R1BIO_Uptodate, &r1_bio->state); if (atomic_dec_and_test(&r1_bio->remaining)) reschedule_retry(r1_bio);}static void end_sync_write(struct bio *bio, int error){ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); mddev_t *mddev = r1_bio->mddev; conf_t *conf = mddev_to_conf(mddev); int i; int mirror=0; for (i = 0; i < conf->raid_disks; i++) if (r1_bio->bios[i] == bio) { mirror = i; break; } if (!uptodate) { int sync_blocks = 0; sector_t s = r1_bio->sector; long sectors_to_go = r1_bio->sectors; /* make sure these bits doesn't get cleared. */ do { bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); md_error(mddev, conf->mirrors[mirror].rdev); } update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { md_done_sync(mddev, r1_bio->sectors, uptodate); put_buf(r1_bio); }}static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio){ conf_t *conf = mddev_to_conf(mddev); int i; int disks = conf->raid_disks; struct bio *bio, *wbio; bio = r1_bio->bios[r1_bio->read_disk]; if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We have read all readable devices. If we haven't * got the block, then there is no hope left. * If we have, then we want to do a comparison * and skip the write if everything is the same. * If any blocks failed to read, then we need to * attempt an over-write */ int primary; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { for (i=0; i<mddev->raid_disks; i++) if (r1_bio->bios[i]->bi_end_io == end_sync_read) md_error(mddev, conf->mirrors[i].rdev); md_done_sync(mddev, r1_bio->sectors, 1); put_buf(r1_bio); return; } for (primary=0; primary<mddev->raid_disks; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { r1_bio->bios[primary]->bi_end_io = NULL; rdev_dec_pending(conf->mirrors[primary].rdev, mddev); break; } r1_bio->read_disk = primary; for (i=0; i<mddev->raid_disks; i++) if (r1_bio->bios[i]->bi_end_io == end_sync_read) { int j; int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); struct bio *pbio = r1_bio->bios[primary]; struct bio *sbio = r1_bio->bios[i]; if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { for (j = vcnt; j-- ; ) { struct page *p, *s; p = pbio->bi_io_vec[j].bv_page; s = sbio->bi_io_vec[j].bv_page; if (memcmp(page_address(p), page_address(s), PAGE_SIZE)) break; } } else j = 0; if (j >= 0) mddev->resync_mismatches += r1_bio->sectors; if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { sbio->bi_end_io = NULL; rdev_dec_pending(conf->mirrors[i].rdev, mddev); } else { /* fixup the bio for reuse */ sbio->bi_vcnt = vcnt; sbio->bi_size = r1_bio->sectors << 9; sbio->bi_idx = 0; sbio->bi_phys_segments = 0; sbio->bi_hw_segments = 0; sbio->bi_hw_front_size = 0; sbio->bi_hw_back_size = 0; sbio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bi_flags |= 1 << BIO_UPTODATE; sbio->bi_next = NULL; sbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; sbio->bi_bdev = conf->mirrors[i].rdev->bdev; for (j = 0; j < vcnt ; j++) memcpy(page_address(sbio->bi_io_vec[j].bv_page), page_address(pbio->bi_io_vec[j].bv_page), PAGE_SIZE); } } } if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { /* ouch - failed to read all of that. * Try some synchronous reads of other devices to get * good data, much like with normal read errors. Only * read into the pages we already have so we don't * need to re-issue the read request. * We don't need to freeze the array, because being in an * active sync request, there is no normal IO, and * no overlapping syncs. */ sector_t sect = r1_bio->sector; int sectors = r1_bio->sectors; int idx = 0; while(sectors) { int s = sectors; int d = r1_bio->read_disk; int success = 0; mdk_rdev_t *rdev; if (s > (PAGE_SIZE>>9)) s = PAGE_SIZE >> 9; do { if (r1_bio->bios[d]->bi_end_io == end_sync_read) { /* No rcu protection needed here devices * can only be removed when no resync is * active, and resync is currently active */ rdev = conf->mirrors[d].rdev; if (sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, bio->bi_io_vec[idx].bv_page, READ)) { success = 1; break; } } d++; if (d == conf->raid_disks) d = 0; } while (!success && d != r1_bio->read_disk); if (success) { int start = d; /* write it back and re-read */ set_bit(R1BIO_Uptodate, &r1_bio->state); while (d != r1_bio->read_disk) { if (d == 0) d = conf->raid_disks; d--; if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; atomic_add(s, &rdev->corrected_errors); if (sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, bio->bi_io_vec[idx].bv_page, WRITE) == 0) md_error(mddev, rdev); } d = start; while (d != r1_bio->read_disk) { if (d == 0) d = conf->raid_disks; d--; if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; if (sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, bio->bi_io_vec[idx].bv_page, READ) == 0) md_error(mddev, rdev); } } else { char b[BDEVNAME_SIZE]; /* Cannot read from anywhere, array is toast */ md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error" " for block %llu\n", bdevname(bio->bi_bdev,b), (unsigned long long)r1_bio->sector); md_done_sync(mddev, r1_bio->sectors, 0); put_buf(r1_bio); return; } sectors -= s; sect += s; idx ++; } } /* * schedule writes */ atomic_set(&r1_bio->remaining, 1); for (i = 0; i < disks ; i++) { wbio = r1_bio->bios[i]; if (wbio->bi_end_io == NULL || (wbio->bi_end_io == end_sync_read && (i == r1_bio->read_disk || !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; wbio->bi_rw = WRITE; wbio->bi_end_io = end_sync_write; atomic_inc(&r1_bio->remaining); md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); generic_make_request(wbio); } if (atomic_dec_and_test(&r1_bio->remaining)) { /* if we're here, all write(s) have completed, so clean up */ md_done_sync(mddev, r1_bio->sectors, 1); put_buf(r1_bio); }}/* * This is a kernel thread which: * * 1. Retries failed read operations on working mirrors. * 2. Updates the raid superblock when problems encounter. * 3. Performs writes following reads for array syncronising. */static void fix_read_error(conf_t *conf, int read_disk, sector_t sect, int sectors){ mddev_t *mddev = conf->mddev; while(sectors) { int s = sectors; int d = read_disk; int success = 0; int start; mdk_rdev_t *rdev; if (s > (PAGE_SIZE>>9)) s = PAGE_SIZE >> 9; do { /* Note: no rcu protection needed here * as this is synchronous in the raid1d thread * which is the thread that might remove * a device. If raid1d ever becomes multi-threaded.... */ rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags) && sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, conf->tmppage, READ)) success = 1; else { d++; if (d == conf->raid_disks) d = 0; } } while (!success && d != read_disk); if (!success) { /* Cannot read from anywhere -- bye bye array */ md_error(mddev, conf->mirrors[read_disk].rdev); break; } /* write it back and re-read */ start = d; while (d != read_disk) { if (d==0) d = conf->raid_disks; d--; rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags)) { if (sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, conf->tmppage, WRITE) == 0) /* Well, this device is dead */ md_error(mddev, rdev); } } d = start; while (d != read_disk) { char b[BDEVNAME_SIZE]; if (d==0) d = conf->raid_disks; d--; rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags)) { if (sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, conf->tmppage, READ) == 0) /* Well, this device is dead */ md_error(mddev, rdev); else { atomic_add(s, &rdev->corrected_errors); printk(KERN_INFO "raid1:%s: read error corrected " "(%d sectors at %llu on %s)\n", mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b)); } } } sectors -= s; sect += s; }}static void raid1d(mddev_t *mddev){ r1bio_t *r1_bio; struct bio *bio; unsigned long flags; conf_t *conf = mddev_to_conf(mddev); struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; md_check_recovery(mddev); for (;;) { char b[BDEVNAME_SIZE]; spin_lock_irqsave(&conf->device_lock, flags); if (conf->pending_bio_list.head) { bio = bio_list_get(&conf->pending_bio_list); blk_remove_plug(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); /* flush any pending bitmap writes to disk before proceeding w/ I/O */ bitmap_unplug(mddev->bitmap); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; bio->bi_next = NULL; generic_make_request(bio); bio = next; } unplug = 1; continue; } if (list_empty(head)) break; r1_bio = list_entry(head->prev, r1bio_t, retry_list); list_del(head->prev); conf->nr_queued--; spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r1_bio->mddev; conf = mddev_to_conf(mddev); if (test_bit(R1BIO_IsSync, &r1_bio->state)) { sync_request_write(mddev, r1_bio); unplug = 1; } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { /* some requests in the r1bio were BIO_RW_BARRIER * requests which failed with -EOPNOTSUPP. Hohumm.. * Better resubmit without the barrier. * We know which devices to resubmit for, because * all others have had their bios[] entry cleared. * We already have a nr_pending reference on these rdevs. */ int i; const int do_sync = bio_sync(r1_bio->master_bio); clear_bit(R1BIO_BarrierRetry, &r1_bio->state); clear_bit(R1BIO_Barrier, &r1_bio->state); for (i=0; i < conf->raid_disks; i++) if (r1_bio->bios[i]) atomic_inc(&r1_bio->remaining); for (i=0; i < conf->raid_disks; i++) if (r1_bio->bios[i]) { struct bio_vec *bvec; int j; bio = bio_clone(r1_bio->master_bio, GFP_NOIO); /* copy pages from the failed bio, as * this might be a write-behind device */ __bio_for_each_segment(bvec, bio, j, 0) bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; bio_put(r1_bio->bios[i]); bio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_end_io = raid1_end_write_request; bio->bi_rw = WRITE | do_sync; bio->bi_private = r1_bio; r1_bio->bios[i] = bio; generic_make_request(bio); } } else { int disk; /* we got a read error. Maybe the drive is bad. Maybe just * the block and we can fix it. * We freeze all other IO, and try reading the block from * other devices. When we find one, we re-write * and check it that fixes the read error. * This is all done synchronously while the array is * frozen */ if (mddev->ro == 0) { freeze_array(conf); fix_read_error(conf, r1_bio->read_disk, r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); } bio = r1_bio->bios[r1_bio->read_disk]; if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { const int do_sync = bio_sync(r1_bio->master_bio); r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; bio_put(bio); bio = bio_clone(r1_bio->master_bio, GFP_NOIO); r1_bio->bios[r1_bio->read_disk] = bio; rdev = conf->mirrors[disk].rdev; if (printk_ratelimit()) printk(KERN_ERR "raid1: %s: redirecting sector %llu to" " another mirror\n", bdevname(rdev->bdev,b), (unsigned long long)r1_bio->sector); bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_end_io = raid1_end_read_request; bio->bi_rw = READ | do_sync; bio->bi_private = r1_bio; unplug = 1; generic_make_request(bio); } } } spin_unlock_irqrestore(&conf->device_lock, flags); if (unplug) unplug_slaves(mddev);}static int init_resync(conf_t *conf){ int buffs; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; BUG_ON(conf->r1buf_pool); conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, conf->poolinfo); if (!conf->r1buf_pool) return -ENOMEM; conf->next_resync = 0; return 0;}/* * perform a "sync" on one "block" * * We need to make sure that no normal I/O request - particularly write * requests - conflict with active sync requests. * * This is achieved by tracking pending requests and a 'barrier' concept * that can be installed to exclude normal IO requests. */static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster){ conf_t *conf = mddev_to_conf(mddev); r1bio_t *r1_bio; struct bio *bio; sector_t max_sector, nr_sectors; int disk = -1; int i; int wonly = -1; int write_targets = 0, read_targets = 0; int sync_blocks; int still_degraded = 0; if (!conf->r1buf_pool) {/* printk("sync start - bitmap %p\n", mddev->bitmap);*/ if (init_resync(conf)) return 0; } max_sector = mddev->size << 1; if (sector_nr >= max_sector) { /* If we aborted, we need to abort the * sync on the 'current' bitmap chunk (there will * only be one in raid1 resync. * We can find the current addess in mddev->curr_resync
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?