📄 raid1.c
字号:
new_disk--; if (new_disk == disk) { /* * This means no working disk was found * Nothing much to do, lets not change anything * and hope for the best... */ new_disk = conf->last_used; goto rb_out; } } disk = new_disk; /* now disk == new_disk == starting point for search */ /* * Don't touch anything for sequential reads. */ if (this_sector == conf->mirrors[new_disk].head_position) goto rb_out; /* * If reads have been done only on a single disk * for a time, lets give another disk a change. * This is for kicking those idling disks so that * they would find work near some hotspot. */ if (conf->sect_count >= conf->mirrors[new_disk].sect_limit) { conf->sect_count = 0; do { if (new_disk<=0) new_disk = conf->raid_disks; new_disk--; if (new_disk == disk) break; } while ((conf->mirrors[new_disk].write_only) || (!conf->mirrors[new_disk].operational)); goto rb_out; } current_distance = abs(this_sector - conf->mirrors[disk].head_position); /* Find the disk which is closest */ do { if (disk <= 0) disk = conf->raid_disks; disk--; if ((conf->mirrors[disk].write_only) || (!conf->mirrors[disk].operational)) continue; new_distance = abs(this_sector - conf->mirrors[disk].head_position); if (new_distance < current_distance) { conf->sect_count = 0; current_distance = new_distance; new_disk = disk; } } while (disk != conf->last_used);rb_out: conf->mirrors[new_disk].head_position = this_sector + sectors; conf->last_used = new_disk; conf->sect_count += sectors; return new_disk;}static int raid1_make_request (mddev_t *mddev, int rw, struct buffer_head * bh){ raid1_conf_t *conf = mddev_to_conf(mddev); struct buffer_head *bh_req, *bhl; struct raid1_bh * r1_bh; int disks = MD_SB_DISKS; int i, sum_bhs = 0, sectors; struct mirror_info *mirror; if (!buffer_locked(bh)) BUG(); /* * make_request() can abort the operation when READA is being * used and no empty request is available. * * Currently, just replace the command with READ/WRITE. */ if (rw == READA) rw = READ; r1_bh = raid1_alloc_r1bh (conf); spin_lock_irq(&conf->segment_lock); wait_event_lock_irq(conf->wait_done, bh->b_rsector < conf->start_active || bh->b_rsector >= conf->start_future, conf->segment_lock); if (bh->b_rsector < conf->start_active) conf->cnt_done++; else { conf->cnt_future++; if (conf->phase) set_bit(R1BH_SyncPhase, &r1_bh->state); } spin_unlock_irq(&conf->segment_lock); /* * i think the read and write branch should be separated completely, * since we want to do read balancing on the read side for example. * Alternative implementations? :) --mingo */ r1_bh->master_bh = bh; r1_bh->mddev = mddev; r1_bh->cmd = rw; sectors = bh->b_size >> 9; if (rw == READ) { /* * read balancing logic: */ mirror = conf->mirrors + raid1_read_balance(conf, bh); bh_req = &r1_bh->bh_req; memcpy(bh_req, bh, sizeof(*bh)); bh_req->b_blocknr = bh->b_rsector / sectors; bh_req->b_dev = mirror->dev; bh_req->b_rdev = mirror->dev; /* bh_req->b_rsector = bh->n_rsector; */ bh_req->b_end_io = raid1_end_request; bh_req->b_private = r1_bh; generic_make_request (rw, bh_req); return 0; } /* * WRITE: */ bhl = raid1_alloc_bh(conf, conf->raid_disks); for (i = 0; i < disks; i++) { struct buffer_head *mbh; if (!conf->mirrors[i].operational) continue; /* * We should use a private pool (size depending on NR_REQUEST), * to avoid writes filling up the memory with bhs * * Such pools are much faster than kmalloc anyways (so we waste * almost nothing by not using the master bh when writing and * win alot of cleanness) but for now we are cool enough. --mingo * * It's safe to sleep here, buffer heads cannot be used in a shared * manner in the write branch. Look how we lock the buffer at the * beginning of this function to grok the difference ;) */ mbh = bhl; if (mbh == NULL) { MD_BUG(); break; } bhl = mbh->b_next; mbh->b_next = NULL; mbh->b_this_page = (struct buffer_head *)1; /* * prepare mirrored mbh (fields ordered for max mem throughput): */ mbh->b_blocknr = bh->b_rsector / sectors; mbh->b_dev = conf->mirrors[i].dev; mbh->b_rdev = conf->mirrors[i].dev; mbh->b_rsector = bh->b_rsector; mbh->b_state = (1<<BH_Req) | (1<<BH_Dirty) | (1<<BH_Mapped) | (1<<BH_Lock); atomic_set(&mbh->b_count, 1); mbh->b_size = bh->b_size; mbh->b_page = bh->b_page; mbh->b_data = bh->b_data; mbh->b_list = BUF_LOCKED; mbh->b_end_io = raid1_end_request; mbh->b_private = r1_bh; mbh->b_next = r1_bh->mirror_bh_list; r1_bh->mirror_bh_list = mbh; sum_bhs++; } if (bhl) raid1_free_bh(conf,bhl); md_atomic_set(&r1_bh->remaining, sum_bhs); /* * We have to be a bit careful about the semaphore above, thats * why we start the requests separately. Since kmalloc() could * fail, sleep and make_request() can sleep too, this is the * safer solution. Imagine, end_request decreasing the semaphore * before we could have set it up ... We could play tricks with * the semaphore (presetting it and correcting at the end if * sum_bhs is not 'n' but we have to do end_request by hand if * all requests finish until we had a chance to set up the * semaphore correctly ... lots of races). */ bh = r1_bh->mirror_bh_list; while(bh) { struct buffer_head *bh2 = bh; bh = bh->b_next; generic_make_request(rw, bh2); } return (0);}static int raid1_status (char *page, mddev_t *mddev){ raid1_conf_t *conf = mddev_to_conf(mddev); int sz = 0, i; sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks); for (i = 0; i < conf->raid_disks; i++) sz += sprintf (page+sz, "%s", conf->mirrors[i].operational ? "U" : "_"); sz += sprintf (page+sz, "]"); return sz;}#define LAST_DISK KERN_ALERT \"raid1: only one disk left and IO error.\n"#define NO_SPARE_DISK KERN_ALERT \"raid1: no spare disk left, degrading mirror level by one.\n"#define DISK_FAILED KERN_ALERT \"raid1: Disk failure on %s, disabling device. \n" \" Operation continuing on %d devices\n"#define START_SYNCING KERN_ALERT \"raid1: start syncing spare disk.\n"#define ALREADY_SYNCING KERN_INFO \"raid1: syncing already in progress.\n"static void mark_disk_bad (mddev_t *mddev, int failed){ raid1_conf_t *conf = mddev_to_conf(mddev); struct mirror_info *mirror = conf->mirrors+failed; mdp_super_t *sb = mddev->sb; mirror->operational = 0; mark_disk_faulty(sb->disks+mirror->number); mark_disk_nonsync(sb->disks+mirror->number); mark_disk_inactive(sb->disks+mirror->number); sb->active_disks--; sb->working_disks--; sb->failed_disks++; mddev->sb_dirty = 1; md_wakeup_thread(conf->thread); conf->working_disks--; printk (DISK_FAILED, partition_name (mirror->dev), conf->working_disks);}static int raid1_error (mddev_t *mddev, kdev_t dev){ raid1_conf_t *conf = mddev_to_conf(mddev); struct mirror_info * mirrors = conf->mirrors; int disks = MD_SB_DISKS; int i; if (conf->working_disks == 1) { /* * Uh oh, we can do nothing if this is our last disk, but * first check if this is a queued request for a device * which has just failed. */ for (i = 0; i < disks; i++) { if (mirrors[i].dev==dev && !mirrors[i].operational) return 0; } printk (LAST_DISK); } else { /* * Mark disk as unusable */ for (i = 0; i < disks; i++) { if (mirrors[i].dev==dev && mirrors[i].operational) { mark_disk_bad(mddev, i); break; } } } return 0;}#undef LAST_DISK#undef NO_SPARE_DISK#undef DISK_FAILED#undef START_SYNCINGstatic void print_raid1_conf (raid1_conf_t *conf){ int i; struct mirror_info *tmp; printk("RAID1 conf printout:\n"); if (!conf) { printk("(conf==NULL)\n"); return; } printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks, conf->raid_disks, conf->nr_disks); for (i = 0; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", i, tmp->spare,tmp->operational, tmp->number,tmp->raid_disk,tmp->used_slot, partition_name(tmp->dev)); }}static void close_sync(raid1_conf_t *conf){ mddev_t *mddev = conf->mddev; /* If reconstruction was interrupted, we need to close the "active" and "pending" * holes. * we know that there are no active rebuild requests, os cnt_active == cnt_ready ==0 */ /* this is really needed when recovery stops too... */ spin_lock_irq(&conf->segment_lock); conf->start_active = conf->start_pending; conf->start_ready = conf->start_pending; wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock); conf->start_active =conf->start_ready = conf->start_pending = conf->start_future; conf->start_future = mddev->sb->size+1; conf->cnt_pending = conf->cnt_future; conf->cnt_future = 0; conf->phase = conf->phase ^1; wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock); conf->start_active = conf->start_ready = conf->start_pending = conf->start_future = 0; conf->phase = 0; conf->cnt_future = conf->cnt_done;; conf->cnt_done = 0; spin_unlock_irq(&conf->segment_lock); wake_up(&conf->wait_done);}static int raid1_diskop(mddev_t *mddev, mdp_disk_t **d, int state){ int err = 0; int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; raid1_conf_t *conf = mddev->private; struct mirror_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; mdp_super_t *sb = mddev->sb; mdp_disk_t *failed_desc, *spare_desc, *added_desc; print_raid1_conf(conf); md_spin_lock_irq(&conf->device_lock); /* * find the disk ... */ switch (state) { case DISKOP_SPARE_ACTIVE: /* * Find the failed disk within the RAID1 configuration ... * (this can only be in the first conf->working_disks part) */ for (i = 0; i < conf->raid_disks; i++) { tmp = conf->mirrors + i; if ((!tmp->operational && !tmp->spare) || !tmp->used_slot) { failed_disk = i; break; } } /* * When we activate a spare disk we _must_ have a disk in * the lower (active) part of the array to replace. */ if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { MD_BUG(); err = 1; goto abort; } /* fall through */ case DISKOP_SPARE_WRITE: case DISKOP_SPARE_INACTIVE: /* * Find the spare disk ... (can only be in the 'high' * area of the array) */ for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; if (tmp->spare && tmp->number == (*d)->number) { spare_disk = i; break; } } if (spare_disk == -1) { MD_BUG(); err = 1; goto abort; } break; case DISKOP_HOT_REMOVE_DISK: for (i = 0; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; if (tmp->used_slot && (tmp->number == (*d)->number)) { if (tmp->operational) { err = -EBUSY; goto abort; } removed_disk = i; break; } } if (removed_disk == -1) { MD_BUG(); err = 1; goto abort; } break; case DISKOP_HOT_ADD_DISK: for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { tmp = conf->mirrors + i; if (!tmp->used_slot) { added_disk = i; break; } } if (added_disk == -1) { MD_BUG(); err = 1; goto abort; } break; } switch (state) { /* * Switch the spare disk to write-only mode: */ case DISKOP_SPARE_WRITE: sdisk = conf->mirrors + spare_disk; sdisk->operational = 1; sdisk->write_only = 1; break; /* * Deactivate a spare disk: */ case DISKOP_SPARE_INACTIVE:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -