📄 raid1.c
字号:
close_sync(conf); sdisk = conf->mirrors + spare_disk; sdisk->operational = 0; sdisk->write_only = 0; break; /* * Activate (mark read-write) the (now sync) spare disk, * which means we switch it's 'raid position' (->raid_disk) * with the failed disk. (only the first 'conf->nr_disks' * slots are used for 'real' disks and we must preserve this * property) */ case DISKOP_SPARE_ACTIVE: close_sync(conf); sdisk = conf->mirrors + spare_disk; fdisk = conf->mirrors + failed_disk; spare_desc = &sb->disks[sdisk->number]; failed_desc = &sb->disks[fdisk->number]; if (spare_desc != *d) { MD_BUG(); err = 1; goto abort; } if (spare_desc->raid_disk != sdisk->raid_disk) { MD_BUG(); err = 1; goto abort; } if (sdisk->raid_disk != spare_disk) { MD_BUG(); err = 1; goto abort; } if (failed_desc->raid_disk != fdisk->raid_disk) { MD_BUG(); err = 1; goto abort; } if (fdisk->raid_disk != failed_disk) { MD_BUG(); err = 1; goto abort; } /* * do the switch finally */ xchg_values(*spare_desc, *failed_desc); xchg_values(*fdisk, *sdisk); /* * (careful, 'failed' and 'spare' are switched from now on) * * we want to preserve linear numbering and we want to * give the proper raid_disk number to the now activated * disk. (this means we switch back these values) */ xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); xchg_values(sdisk->raid_disk, fdisk->raid_disk); xchg_values(spare_desc->number, failed_desc->number); xchg_values(sdisk->number, fdisk->number); *d = failed_desc; if (sdisk->dev == MKDEV(0,0)) sdisk->used_slot = 0; /* * this really activates the spare. */ fdisk->spare = 0; fdisk->write_only = 0; /* * if we activate a spare, we definitely replace a * non-operational disk slot in the 'low' area of * the disk array. */ conf->working_disks++; break; case DISKOP_HOT_REMOVE_DISK: rdisk = conf->mirrors + removed_disk; if (rdisk->spare && (removed_disk < conf->raid_disks)) { MD_BUG(); err = 1; goto abort; } rdisk->dev = MKDEV(0,0); rdisk->used_slot = 0; conf->nr_disks--; break; case DISKOP_HOT_ADD_DISK: adisk = conf->mirrors + added_disk; added_desc = *d; if (added_disk != added_desc->number) { MD_BUG(); err = 1; goto abort; } adisk->number = added_desc->number; adisk->raid_disk = added_desc->raid_disk; adisk->dev = MKDEV(added_desc->major,added_desc->minor); adisk->operational = 0; adisk->write_only = 0; adisk->spare = 1; adisk->used_slot = 1; adisk->head_position = 0; conf->nr_disks++; break; default: MD_BUG(); err = 1; goto abort; }abort: md_spin_unlock_irq(&conf->device_lock); if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE) /* should move to "END_REBUILD" when such exists */ raid1_shrink_buffers(conf); print_raid1_conf(conf); return err;}#define IO_ERROR KERN_ALERT \"raid1: %s: unrecoverable I/O read error for block %lu\n"#define REDIRECT_SECTOR KERN_ERR \"raid1: %s: redirecting sector %lu to another mirror\n"/* * This is a kernel thread which: * * 1. Retries failed read operations on working mirrors. * 2. Updates the raid superblock when problems encounter. * 3. Performs writes following reads for array syncronising. */static void end_sync_write(struct buffer_head *bh, int uptodate);static void end_sync_read(struct buffer_head *bh, int uptodate);static void raid1d (void *data){ struct raid1_bh *r1_bh; struct buffer_head *bh; unsigned long flags; mddev_t *mddev; kdev_t dev; for (;;) { md_spin_lock_irqsave(&retry_list_lock, flags); r1_bh = raid1_retry_list; if (!r1_bh) break; raid1_retry_list = r1_bh->next_r1; md_spin_unlock_irqrestore(&retry_list_lock, flags); mddev = r1_bh->mddev; if (mddev->sb_dirty) { printk(KERN_INFO "dirty sb detected, updating.\n"); mddev->sb_dirty = 0; md_update_sb(mddev); } bh = &r1_bh->bh_req; switch(r1_bh->cmd) { case SPECIAL: /* have to allocate lots of bh structures and * schedule writes */ if (test_bit(R1BH_Uptodate, &r1_bh->state)) { int i, sum_bhs = 0; int disks = MD_SB_DISKS; struct buffer_head *bhl, *mbh; raid1_conf_t *conf; int sectors = bh->b_size >> 9; conf = mddev_to_conf(mddev); bhl = raid1_alloc_bh(conf, conf->raid_disks); /* don't really need this many */ for (i = 0; i < disks ; i++) { if (!conf->mirrors[i].operational) continue; if (i==conf->last_used) /* we read from here, no need to write */ continue; if (i < conf->raid_disks && !conf->resync_mirrors) /* don't need to write this, * we are just rebuilding */ continue; mbh = bhl; if (!mbh) { MD_BUG(); break; } bhl = mbh->b_next; mbh->b_this_page = (struct buffer_head *)1; /* * prepare mirrored bh (fields ordered for max mem throughput): */ mbh->b_blocknr = bh->b_blocknr; mbh->b_dev = conf->mirrors[i].dev; mbh->b_rdev = conf->mirrors[i].dev; mbh->b_rsector = bh->b_blocknr * sectors; mbh->b_state = (1<<BH_Req) | (1<<BH_Dirty) | (1<<BH_Mapped) | (1<<BH_Lock); atomic_set(&mbh->b_count, 1); mbh->b_size = bh->b_size; mbh->b_page = bh->b_page; mbh->b_data = bh->b_data; mbh->b_list = BUF_LOCKED; mbh->b_end_io = end_sync_write; mbh->b_private = r1_bh; mbh->b_next = r1_bh->mirror_bh_list; r1_bh->mirror_bh_list = mbh; sum_bhs++; } md_atomic_set(&r1_bh->remaining, sum_bhs); if (bhl) raid1_free_bh(conf, bhl); mbh = r1_bh->mirror_bh_list; while (mbh) { struct buffer_head *bh1 = mbh; mbh = mbh->b_next; generic_make_request(WRITE, bh1); md_sync_acct(bh1->b_dev, bh1->b_size/512); } } else { dev = bh->b_dev; raid1_map (mddev, &bh->b_dev, bh->b_size >> 9); if (bh->b_dev == dev) { printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr); md_done_sync(mddev, bh->b_size>>10, 0); } else { printk (REDIRECT_SECTOR, partition_name(bh->b_dev), bh->b_blocknr); bh->b_rdev = bh->b_dev; generic_make_request(READ, bh); } } break; case READ: case READA: dev = bh->b_dev; raid1_map (mddev, &bh->b_dev, bh->b_size >> 9); if (bh->b_dev == dev) { printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr); raid1_end_bh_io(r1_bh, 0); } else { printk (REDIRECT_SECTOR, partition_name(bh->b_dev), bh->b_blocknr); bh->b_rdev = bh->b_dev; generic_make_request (r1_bh->cmd, bh); } break; } } md_spin_unlock_irqrestore(&retry_list_lock, flags);}#undef IO_ERROR#undef REDIRECT_SECTOR/* * Private kernel thread to reconstruct mirrors after an unclean * shutdown. */static void raid1syncd (void *data){ raid1_conf_t *conf = data; mddev_t *mddev = conf->mddev; if (!conf->resync_mirrors) return; if (conf->resync_mirrors == 2) return; down(&mddev->recovery_sem); if (!md_do_sync(mddev, NULL)) { /* * Only if everything went Ok. */ conf->resync_mirrors = 0; } close_sync(conf); up(&mddev->recovery_sem); raid1_shrink_buffers(conf);}/* * perform a "sync" on one "block" * * We need to make sure that no normal I/O request - particularly write * requests - conflict with active sync requests. * This is achieved by conceptually dividing the device space into a * number of sections: * DONE: 0 .. a-1 These blocks are in-sync * ACTIVE: a.. b-1 These blocks may have active sync requests, but * no normal IO requests * READY: b .. c-1 These blocks have no normal IO requests - sync * request may be happening * PENDING: c .. d-1 These blocks may have IO requests, but no new * ones will be added * FUTURE: d .. end These blocks are not to be considered yet. IO may * be happening, but not sync * * We keep a * phase which flips (0 or 1) each time d moves and * a count of: * z = active io requests in FUTURE since d moved - marked with * current phase * y = active io requests in FUTURE before d moved, or PENDING - * marked with previous phase * x = active sync requests in READY * w = active sync requests in ACTIVE * v = active io requests in DONE * * Normally, a=b=c=d=0 and z= active io requests * or a=b=c=d=END and v= active io requests * Allowed changes to a,b,c,d: * A: c==d && y==0 -> d+=window, y=z, z=0, phase=!phase * B: y==0 -> c=d * C: b=c, w+=x, x=0 * D: w==0 -> a=b * E: a==b==c==d==end -> a=b=c=d=0, z=v, v=0 * * At start of sync we apply A. * When y reaches 0, we apply B then A then being sync requests * When sync point reaches c-1, we wait for y==0, and W==0, and * then apply apply B then A then D then C. * Finally, we apply E * * The sync request simply issues a "read" against a working drive * This is marked so that on completion the raid1d thread is woken to * issue suitable write requests */static int raid1_sync_request (mddev_t *mddev, unsigned long block_nr){ raid1_conf_t *conf = mddev_to_conf(mddev); struct mirror_info *mirror; struct raid1_bh *r1_bh; struct buffer_head *bh; int bsize; int disk; spin_lock_irq(&conf->segment_lock); if (!block_nr) { /* initialize ...*/ int buffs; conf->start_active = 0; conf->start_ready = 0; conf->start_pending = 0; conf->start_future = 0; conf->phase = 0; /* we want enough buffers to hold twice the window of 128*/ buffs = 128 *2 / (PAGE_SIZE>>9); buffs = raid1_grow_buffers(conf, buffs); if (buffs < 2) goto nomem; conf->window = buffs*(PAGE_SIZE>>9)/2; conf->cnt_future += conf->cnt_done+conf->cnt_pending; conf->cnt_done = conf->cnt_pending = 0; if (conf->cnt_ready || conf->cnt_active) MD_BUG(); } while ((block_nr<<1) >= conf->start_pending) { PRINTK("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n", block_nr<<1, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future, conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future); wait_event_lock_irq(conf->wait_done, !conf->cnt_active, conf->segment_lock); wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock); conf->start_active = conf->start_ready; conf->start_ready = conf->start_pending; conf->start_pending = conf->start_future; conf->start_future = conf->start_future+conf->window; // Note: falling off the end is not a problem conf->phase = conf->phase ^1; conf->cnt_active = conf->cnt_ready; conf->cnt_ready = 0; conf->cnt_pending = conf->cnt_future; conf->cnt_future = 0; wake_up(&conf->wait_done); } conf->cnt_ready++; spin_unlock_irq(&conf->segment_lock); /* If reconstructing, and >1 working disc, * could dedicate one to rebuild and others to * service read requests .. */ disk = conf->last_used; /* make sure disk is operational */ while (!conf->mirrors[disk].operational) { if (disk <= 0) disk = conf->raid_disks; disk--; if (disk == conf->last_used) break; } conf->last_used = disk; mirror = conf->mirrors+conf->last_used; r1_bh = raid1_alloc_buf (conf); r1_bh->master_bh = NULL; r1_bh->mddev = mddev; r1_bh->cmd = SPECIAL; bh = &r1_bh->bh_req; bh->b_blocknr = block_nr; bsize = 1024; while (!(bh->b_blocknr & 1) && bsize < PAGE_SIZE && (bh->b_blocknr+2)*(bsize>>10) < mddev->sb->size) { bh->b_blocknr >>= 1; bsize <<= 1; } bh->b_size = bsize; bh->b_list = BUF_LOCKED; bh->b_dev = mirror->dev; bh->b_rdev = mirror->dev; bh->b_state = (1<<BH_Req) | (1<<BH_Mapped) | (1<<BH_Lock); if (!bh->b_page) BUG(); if (!bh->b_data) BUG(); if (bh->b_data != page_address(bh->b_page)) BUG(); bh->b_end_io = end_sync_read; bh->b_private = r1_bh; bh->b_rsector = block_nr<<1; init_waitqueue_head(&bh->b_wait); generic_make_request(READ, bh); md_sync_acct(bh->b_dev, bh->b_size/512); return (bsize >> 10);nomem: raid1_shrink_buffers(conf); spin_unlock_irq(&conf->segment_lock); return -ENOMEM;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -