📄 raid1.c
字号:
bh_req = &r1_bh->bh_req; memcpy(bh_req, bh, sizeof(*bh)); bh_req->b_blocknr = bh->b_rsector; bh_req->b_dev = mirror->dev; bh_req->b_rdev = mirror->dev; /* bh_req->b_rsector = bh->n_rsector; */ bh_req->b_end_io = raid1_end_request; bh_req->b_private = r1_bh; generic_make_request (rw, bh_req); return 0; } /* * WRITE: */ bhl = raid1_alloc_bh(conf, conf->raid_disks); for (i = 0; i < disks; i++) { struct buffer_head *mbh; if (!conf->mirrors[i].operational) continue; /* * We should use a private pool (size depending on NR_REQUEST), * to avoid writes filling up the memory with bhs * * Such pools are much faster than kmalloc anyways (so we waste * almost nothing by not using the master bh when writing and * win alot of cleanness) but for now we are cool enough. --mingo * * It's safe to sleep here, buffer heads cannot be used in a shared * manner in the write branch. Look how we lock the buffer at the * beginning of this function to grok the difference ;) */ mbh = bhl; if (mbh == NULL) { MD_BUG(); break; } bhl = mbh->b_next; mbh->b_next = NULL; mbh->b_this_page = (struct buffer_head *)1; /* * prepare mirrored mbh (fields ordered for max mem throughput): */ mbh->b_blocknr = bh->b_rsector; mbh->b_dev = conf->mirrors[i].dev; mbh->b_rdev = conf->mirrors[i].dev; mbh->b_rsector = bh->b_rsector; mbh->b_state = (1<<BH_Req) | (1<<BH_Dirty) | (1<<BH_Mapped) | (1<<BH_Lock); atomic_set(&mbh->b_count, 1); mbh->b_size = bh->b_size; mbh->b_page = bh->b_page; mbh->b_data = bh->b_data; mbh->b_list = BUF_LOCKED; mbh->b_end_io = raid1_end_request; mbh->b_private = r1_bh; mbh->b_next = r1_bh->mirror_bh_list; r1_bh->mirror_bh_list = mbh; sum_bhs++; } if (bhl) raid1_free_bh(conf,bhl); if (!sum_bhs) { /* Gag - all mirrors non-operational.. */ raid1_end_bh_io(r1_bh, 0); return 0; } md_atomic_set(&r1_bh->remaining, sum_bhs); /* * We have to be a bit careful about the semaphore above, thats * why we start the requests separately. Since kmalloc() could * fail, sleep and make_request() can sleep too, this is the * safer solution. Imagine, end_request decreasing the semaphore * before we could have set it up ... We could play tricks with * the semaphore (presetting it and correcting at the end if * sum_bhs is not 'n' but we have to do end_request by hand if * all requests finish until we had a chance to set up the * semaphore correctly ... lots of races). */ bh = r1_bh->mirror_bh_list; while(bh) { struct buffer_head *bh2 = bh; bh = bh->b_next; generic_make_request(rw, bh2); } return (0);}static int raid1_status (char *page, mddev_t *mddev){ raid1_conf_t *conf = mddev_to_conf(mddev); int sz = 0, i; sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks); for (i = 0; i < conf->raid_disks; i++) sz += sprintf (page+sz, "%s", conf->mirrors[i].operational ? "U" : "_"); sz += sprintf (page+sz, "]"); return sz;}#define LAST_DISK KERN_ALERT \"raid1: only one disk left and IO error.\n"#define NO_SPARE_DISK KERN_ALERT \"raid1: no spare disk left, degrading mirror level by one.\n"#define DISK_FAILED KERN_ALERT \"raid1: Disk failure on %s, disabling device. \n" \" Operation continuing on %d devices\n"#define START_SYNCING KERN_ALERT \"raid1: start syncing spare disk.\n"#define ALREADY_SYNCING KERN_INFO \"raid1: syncing already in progress.\n"static void mark_disk_bad (mddev_t *mddev, int failed){ raid1_conf_t *conf = mddev_to_conf(mddev); struct mirror_info *mirror = conf->mirrors+failed; mdp_super_t *sb = mddev->sb; mirror->operational = 0; mark_disk_faulty(sb->disks+mirror->number); mark_disk_nonsync(sb->disks+mirror->number); mark_disk_inactive(sb->disks+mirror->number); if (!mirror->write_only) sb->active_disks--; sb->working_disks--; sb->failed_disks++; mddev->sb_dirty = 1; md_wakeup_thread(conf->thread); if (!mirror->write_only) conf->working_disks--; printk (DISK_FAILED, partition_name (mirror->dev), conf->working_disks);}static int raid1_error (mddev_t *mddev, kdev_t dev){ raid1_conf_t *conf = mddev_to_conf(mddev); struct mirror_info * mirrors = conf->mirrors; int disks = MD_SB_DISKS; int i; /* Find the drive. * If it is not operational, then we have already marked it as dead * else if it is the last working disks, ignore the error, let the * next level up know. * else mark the drive as failed */ for (i = 0; i < disks; i++) if (mirrors[i].dev==dev && mirrors[i].operational) break; if (i == disks) return 0; if (i < conf->raid_disks && conf->working_disks == 1) { /* Don't fail the drive, act as though we were just a * normal single drive */ return 1; } mark_disk_bad(mddev, i); return 0;}#undef LAST_DISK#undef NO_SPARE_DISK#undef DISK_FAILED#undef START_SYNCINGstatic void print_raid1_conf (raid1_conf_t *conf){ int i; struct mirror_info *tmp; printk("RAID1 conf printout:\n"); if (!conf) { printk("(conf==NULL)\n"); return; } printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks, conf->raid_disks, conf->nr_disks); for (i = 0; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", i, tmp->spare,tmp->operational, tmp->number,tmp->raid_disk,tmp->used_slot, partition_name(tmp->dev)); }}static void close_sync(raid1_conf_t *conf){ mddev_t *mddev = conf->mddev; /* If reconstruction was interrupted, we need to close the "active" and "pending" * holes. * we know that there are no active rebuild requests, os cnt_active == cnt_ready ==0 */ /* this is really needed when recovery stops too... */ spin_lock_irq(&conf->segment_lock); conf->start_active = conf->start_pending; conf->start_ready = conf->start_pending; wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock); conf->start_active =conf->start_ready = conf->start_pending = conf->start_future; conf->start_future = mddev->sb->size+1; conf->cnt_pending = conf->cnt_future; conf->cnt_future = 0; conf->phase = conf->phase ^1; wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock); conf->start_active = conf->start_ready = conf->start_pending = conf->start_future = 0; conf->phase = 0; conf->cnt_future = conf->cnt_done;; conf->cnt_done = 0; spin_unlock_irq(&conf->segment_lock); wake_up(&conf->wait_done);}static int raid1_diskop(mddev_t *mddev, mdp_disk_t **d, int state){ int err = 0; int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; raid1_conf_t *conf = mddev->private; struct mirror_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; mdp_super_t *sb = mddev->sb; mdp_disk_t *failed_desc, *spare_desc, *added_desc; mdk_rdev_t *spare_rdev, *failed_rdev; print_raid1_conf(conf); md_spin_lock_irq(&conf->device_lock); /* * find the disk ... */ switch (state) { case DISKOP_SPARE_ACTIVE: /* * Find the failed disk within the RAID1 configuration ... * (this can only be in the first conf->working_disks part) */ for (i = 0; i < conf->raid_disks; i++) { tmp = conf->mirrors + i; if ((!tmp->operational && !tmp->spare) || !tmp->used_slot) { failed_disk = i; break; } } /* * When we activate a spare disk we _must_ have a disk in * the lower (active) part of the array to replace. */ if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { MD_BUG(); err = 1; goto abort; } /* fall through */ case DISKOP_SPARE_WRITE: case DISKOP_SPARE_INACTIVE: /* * Find the spare disk ... (can only be in the 'high' * area of the array) */ for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; if (tmp->spare && tmp->number == (*d)->number) { spare_disk = i; break; } } if (spare_disk == -1) { MD_BUG(); err = 1; goto abort; } break; case DISKOP_HOT_REMOVE_DISK: for (i = 0; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; if (tmp->used_slot && (tmp->number == (*d)->number)) { if (tmp->operational) { err = -EBUSY; goto abort; } removed_disk = i; break; } } if (removed_disk == -1) { MD_BUG(); err = 1; goto abort; } break; case DISKOP_HOT_ADD_DISK: for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; if (!tmp->used_slot) { added_disk = i; break; } } if (added_disk == -1) { MD_BUG(); err = 1; goto abort; } break; } switch (state) { /* * Switch the spare disk to write-only mode: */ case DISKOP_SPARE_WRITE: sdisk = conf->mirrors + spare_disk; sdisk->operational = 1; sdisk->write_only = 1; break; /* * Deactivate a spare disk: */ case DISKOP_SPARE_INACTIVE: close_sync(conf); sdisk = conf->mirrors + spare_disk; sdisk->operational = 0; sdisk->write_only = 0; break; /* * Activate (mark read-write) the (now sync) spare disk, * which means we switch it's 'raid position' (->raid_disk) * with the failed disk. (only the first 'conf->nr_disks' * slots are used for 'real' disks and we must preserve this * property) */ case DISKOP_SPARE_ACTIVE: close_sync(conf); sdisk = conf->mirrors + spare_disk; fdisk = conf->mirrors + failed_disk; spare_desc = &sb->disks[sdisk->number]; failed_desc = &sb->disks[fdisk->number]; if (spare_desc != *d) { MD_BUG(); err = 1; goto abort; } if (spare_desc->raid_disk != sdisk->raid_disk) { MD_BUG(); err = 1; goto abort; } if (sdisk->raid_disk != spare_disk) { MD_BUG(); err = 1; goto abort; } if (failed_desc->raid_disk != fdisk->raid_disk) { MD_BUG(); err = 1; goto abort; } if (fdisk->raid_disk != failed_disk) { MD_BUG(); err = 1; goto abort; } /* * do the switch finally */ spare_rdev = find_rdev_nr(mddev, spare_desc->number); failed_rdev = find_rdev_nr(mddev, failed_desc->number); /* There must be a spare_rdev, but there may not be a * failed_rdev. That slot might be empty... */ spare_rdev->desc_nr = failed_desc->number; if (failed_rdev) failed_rdev->desc_nr = spare_desc->number; xchg_values(*spare_desc, *failed_desc); xchg_values(*fdisk, *sdisk); /* * (careful, 'failed' and 'spare' are switched from now on) * * we want to preserve linear numbering and we want to * give the proper raid_disk number to the now activated * disk. (this means we switch back these values) */ xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); xchg_values(sdisk->raid_disk, fdisk->raid_disk); xchg_values(spare_desc->number, failed_desc->number); xchg_values(sdisk->number, fdisk->number); *d = failed_desc; if (sdisk->dev == MKDEV(0,0)) sdisk->used_slot = 0; /* * this really activates the spare. */ fdisk->spare = 0; fdisk->write_only = 0; /* * if we activate a spare, we definitely replace a * non-operational disk slot in the 'low' area of * the disk array. */ conf->working_disks++; break; case DISKOP_HOT_REMOVE_DISK: rdisk = conf->mirrors + removed_disk; if (rdisk->spare && (removed_disk < conf->raid_disks)) { MD_BUG(); err = 1; goto abort; } rdisk->dev = MKDEV(0,0); rdisk->used_slot = 0; conf->nr_disks--; break; case DISKOP_HOT_ADD_DISK: adisk = conf->mirrors + added_disk; added_desc = *d; if (added_disk != added_desc->number) { MD_BUG(); err = 1; goto abort; } adisk->number = added_desc->number; adisk->raid_disk = added_desc->raid_disk; adisk->dev = MKDEV(added_desc->major,added_desc->minor); adisk->operational = 0; adisk->write_only = 0; adisk->spare = 1; adisk->used_slot = 1; adisk->head_position = 0; conf->nr_disks++; break; default: MD_BUG(); err = 1; goto abort; }abort: md_spin_unlock_irq(&conf->device_lock); if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE) /* should move to "END_REBUILD" when such exists */ raid1_shrink_buffers(conf); print_raid1_conf(conf); return err;}#define IO_ERROR KERN_ALERT \"raid1: %s: unrecoverable I/O read error for block %lu\n"#define REDIRECT_SECTOR KERN_ERR \"raid1: %s: redirecting sector %lu to another mirror\n"/* * This is a kernel thread which: * * 1. Retries failed read operations on working mirrors. * 2. Updates the raid superblock when problems encounter. * 3. Performs writes following reads for array syncronising. */static void end_sync_write(struct buffer_head *bh, int uptodate);static void end_sync_read(struct buffer_head *bh, int uptodate);static void raid1d (void *data){ struct raid1_bh *r1_bh; struct buffer_head *bh; unsigned long flags; mddev_t *mddev; kdev_t dev; for (;;) { md_spin_lock_irqsave(&retry_list_lock, flags); r1_bh = raid1_retry_list; if (!r1_bh) break; raid1_retry_list = r1_bh->next_r1; md_spin_unlock_irqrestore(&retry_list_lock, flags); mddev = r1_bh->mddev; if (mddev->sb_dirty) md_update_sb(mddev); bh = &r1_bh->bh_req; switch(r1_bh->cmd) { case SPECIAL: /* have to allocate lots of bh structures and * schedule writes */ if (test_bit(R1BH_Uptodate, &r1_bh->state)) { int i, sum_bhs = 0; int disks = MD_SB_DISKS; struct buffer_head *bhl, *mbh; raid1_conf_t *conf; conf = mddev_to_conf(mddev); bhl = raid1_alloc_bh(conf, conf->raid_disks); /* don't really need this many */ for (i = 0; i < disks ; i++) { if (!conf->mirrors[i].operational) continue; if (i==conf->last_used) /* we read from here, no need to write */ continue; if (i < conf->raid_disks && !conf->resync_mirrors) /* don't need to write this, * we are just rebuilding */ continue; mbh = bhl; if (!mbh) { MD_BUG(); break; } bhl = mbh->b_next; mbh->b_this_page = (struct buffer_head *)1; /* * prepare mirrored bh (fields ordered for max mem throughput): */ mbh->b_blocknr = bh->b_blocknr; mbh->b_dev = conf->mirrors[i].dev; mbh->b_rdev = conf->mirrors[i].dev; mbh->b_rsector = bh->b_blocknr; mbh->b_state = (1<<BH_Req) | (1<<BH_Dirty) | (1<<BH_Mapped) | (1<<BH_Lock); atomic_set(&mbh->b_count, 1); mbh->b_size = bh->b_size; mbh->b_page = bh->b_page; mbh->b_data = bh->b_data; mbh->b_list = BUF_LOCKED; mbh->b_end_io = end_sync_write; mbh->b_private = r1_bh; mbh->b_next = r1_bh->mirror_bh_list; r1_bh->mirror_bh_list = mbh; sum_bhs++; } md_atomic_set(&r1_bh->remaining, sum_bhs); if (bhl) raid1_free_bh(conf, bhl); mbh = r1_bh->mirror_bh_list; if (!sum_bhs) { /* nowhere to write this too... I guess we * must be done */ sync_request_done(bh->b_blocknr, conf); md_done_sync(mddev, bh->b_size>>9, 0); raid1_free_buf(r1_bh); } else while (mbh) { struct buffer_head *bh1 = mbh; mbh = mbh->b_next; generic_make_request(WRITE, bh1); md_sync_acct(bh1->b_dev, bh1->b_size/512); } } else { /* There is no point trying a read-for-reconstruct * as reconstruct is about to be aborted */ printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -