📄 raid5.c
字号:
} else { set_bit(STRIPE_DELAYED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); } } } if (rcw <= rmw && rcw > 0) /* want reconstruct write, but need to get some data */ for (i=disks; i--;) { bh = sh->bh_cache[i]; if (!sh->bh_write[i] && i != sh->pd_idx && !buffer_locked(bh) && !buffer_uptodate(bh) && conf->disks[i].operational) { if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { PRINTK("Read_old block %d for Reconstruct\n", i); set_bit(BH_Lock, &bh->b_state); action[i] = READ+1; locked++; } else { set_bit(STRIPE_DELAYED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); } } } /* now if nothing is locked, and if we have enough data, we can start a write request */ if (locked == 0 && (rcw == 0 ||rmw == 0)) { PRINTK("Computing parity...\n"); compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); /* now every locked buffer is ready to be written */ for (i=disks; i--;) if (buffer_locked(sh->bh_cache[i])) { PRINTK("Writing block %d\n", i); locked++; action[i] = WRITE+1; if (!conf->disks[i].operational || (i==sh->pd_idx && failed == 0)) set_bit(STRIPE_INSYNC, &sh->state); } if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) md_wakeup_thread(conf->thread); } } } /* maybe we need to check and possibly fix the parity for this stripe * Any reads will already have been scheduled, so we just see if enough data * is available */ if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) { set_bit(STRIPE_HANDLE, &sh->state); if (failed == 0) { if (uptodate != disks) BUG(); compute_parity(sh, CHECK_PARITY); uptodate--; bh = sh->bh_cache[sh->pd_idx]; if ((*(u32*)bh->b_data) == 0 && !memcmp(bh->b_data, bh->b_data+4, bh->b_size-4)) { /* parity is correct (on disc, not in buffer any more) */ set_bit(STRIPE_INSYNC, &sh->state); } } if (!test_bit(STRIPE_INSYNC, &sh->state)) { struct disk_info *spare; if (failed==0) failed_num = sh->pd_idx; /* should be able to compute the missing block and write it to spare */ if (!buffer_uptodate(sh->bh_cache[failed_num])) { if (uptodate+1 != disks) BUG(); compute_block(sh, failed_num); uptodate++; } if (uptodate != disks) BUG(); bh = sh->bh_cache[failed_num]; set_bit(BH_Lock, &bh->b_state); action[failed_num] = WRITE+1; locked++; set_bit(STRIPE_INSYNC, &sh->state); if (conf->disks[failed_num].operational) md_sync_acct(conf->disks[failed_num].dev, bh->b_size>>9); else if ((spare=conf->spare)) md_sync_acct(spare->dev, bh->b_size>>9); } } if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, (sh->size>>9) - sh->sync_redone,1); clear_bit(STRIPE_SYNCING, &sh->state); } spin_unlock(&sh->lock); while ((bh=return_ok)) { return_ok = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, 1); } while ((bh=return_fail)) { return_fail = bh->b_reqnext; bh->b_reqnext = NULL; bh->b_end_io(bh, 0); } for (i=disks; i-- ;) if (action[i]) { struct buffer_head *bh = sh->bh_cache[i]; struct disk_info *spare = conf->spare; int skip = 0; if (action[i] == READ+1) bh->b_end_io = raid5_end_read_request; else bh->b_end_io = raid5_end_write_request; if (conf->disks[i].operational) bh->b_dev = conf->disks[i].dev; else if (spare && action[i] == WRITE+1) bh->b_dev = spare->dev; else skip=1; if (!skip) { PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i); atomic_inc(&sh->count); bh->b_rdev = bh->b_dev; bh->b_rsector = bh->b_blocknr * (bh->b_size>>9); generic_make_request(action[i]-1, bh); } else { PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector); clear_bit(BH_Lock, &bh->b_state); set_bit(STRIPE_HANDLE, &sh->state); } }}static inline void raid5_activate_delayed(raid5_conf_t *conf){ if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { while (!list_empty(&conf->delayed_list)) { struct list_head *l = conf->delayed_list.next; struct stripe_head *sh; sh = list_entry(l, struct stripe_head, lru); list_del_init(l); clear_bit(STRIPE_DELAYED, &sh->state); if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); list_add_tail(&sh->lru, &conf->handle_list); } }}static void raid5_unplug_device(void *data){ raid5_conf_t *conf = (raid5_conf_t *)data; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); raid5_activate_delayed(conf); conf->plugged = 0; md_wakeup_thread(conf->thread); spin_unlock_irqrestore(&conf->device_lock, flags);}static inline void raid5_plug_device(raid5_conf_t *conf){ spin_lock_irq(&conf->device_lock); if (list_empty(&conf->delayed_list)) if (!conf->plugged) { conf->plugged = 1; queue_task(&conf->plug_tq, &tq_disk); } spin_unlock_irq(&conf->device_lock);}static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh){ raid5_conf_t *conf = (raid5_conf_t *) mddev->private; const unsigned int raid_disks = conf->raid_disks; const unsigned int data_disks = raid_disks - 1; unsigned int dd_idx, pd_idx; unsigned long new_sector; int read_ahead = 0; struct stripe_head *sh; if (rw == READA) { rw = READ; read_ahead=1; } new_sector = raid5_compute_sector(bh->b_rsector, raid_disks, data_disks, &dd_idx, &pd_idx, conf); PRINTK("raid5_make_request, sector %lu\n", new_sector); sh = get_active_stripe(conf, new_sector, bh->b_size, read_ahead); if (sh) { sh->pd_idx = pd_idx; add_stripe_bh(sh, bh, dd_idx, rw); raid5_plug_device(conf); handle_stripe(sh); release_stripe(sh); } else bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); return 0;}/* * Determine correct block size for this device. */unsigned int device_bsize (kdev_t dev){ unsigned int i, correct_size; correct_size = BLOCK_SIZE; if (blksize_size[MAJOR(dev)]) { i = blksize_size[MAJOR(dev)][MINOR(dev)]; if (i) correct_size = i; } return correct_size;}static int raid5_sync_request (mddev_t *mddev, unsigned long sector_nr){ raid5_conf_t *conf = (raid5_conf_t *) mddev->private; struct stripe_head *sh; int sectors_per_chunk = conf->chunk_size >> 9; unsigned long stripe = sector_nr/sectors_per_chunk; int chunk_offset = sector_nr % sectors_per_chunk; int dd_idx, pd_idx; unsigned long first_sector; int raid_disks = conf->raid_disks; int data_disks = raid_disks-1; int redone = 0; int bufsize; sh = get_active_stripe(conf, sector_nr, 0, 0); bufsize = sh->size; redone = sector_nr - sh->sector; first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); sh->pd_idx = pd_idx; spin_lock(&sh->lock); set_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state); sh->sync_redone = redone; spin_unlock(&sh->lock); handle_stripe(sh); release_stripe(sh); return (bufsize>>9)-redone;}/* * This is our raid5 kernel thread. * * We scan the hash table for stripes which can be handled now. * During the scan, completed stripes are saved for us by the interrupt * handler, so that they will not have to wait for our next wakeup. */static void raid5d (void *data){ struct stripe_head *sh; raid5_conf_t *conf = data; mddev_t *mddev = conf->mddev; int handled; PRINTK("+++ raid5d active\n"); handled = 0; if (mddev->sb_dirty) md_update_sb(mddev); md_spin_lock_irq(&conf->device_lock); while (1) { struct list_head *first; if (list_empty(&conf->handle_list) && atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD && !conf->plugged && !list_empty(&conf->delayed_list)) raid5_activate_delayed(conf); if (list_empty(&conf->handle_list)) break; first = conf->handle_list.next; sh = list_entry(first, struct stripe_head, lru); list_del_init(first); atomic_inc(&sh->count); if (atomic_read(&sh->count)!= 1) BUG(); md_spin_unlock_irq(&conf->device_lock); handled++; handle_stripe(sh); release_stripe(sh); md_spin_lock_irq(&conf->device_lock); } PRINTK("%d stripes handled\n", handled); md_spin_unlock_irq(&conf->device_lock); PRINTK("--- raid5d inactive\n");}/* * Private kernel thread for parity reconstruction after an unclean * shutdown. Reconstruction on spare drives in case of a failed drive * is done by the generic mdsyncd. */static void raid5syncd (void *data){ raid5_conf_t *conf = data; mddev_t *mddev = conf->mddev; if (!conf->resync_parity) return; if (conf->resync_parity == 2) return; down(&mddev->recovery_sem); if (md_do_sync(mddev,NULL)) { up(&mddev->recovery_sem); printk("raid5: resync aborted!\n"); return; } conf->resync_parity = 0; up(&mddev->recovery_sem); printk("raid5: resync finished.\n");}static int raid5_run (mddev_t *mddev){ raid5_conf_t *conf; int i, j, raid_disk, memory; mdp_super_t *sb = mddev->sb; mdp_disk_t *desc; mdk_rdev_t *rdev; struct disk_info *disk; struct md_list_head *tmp; int start_recovery = 0; MOD_INC_USE_COUNT; if (sb->level != 5 && sb->level != 4) { printk("raid5: md%d: raid level not set to 4/5 (%d)\n", mdidx(mddev), sb->level); MOD_DEC_USE_COUNT; return -EIO; } mddev->private = kmalloc (sizeof (raid5_conf_t), GFP_KERNEL); if ((conf = mddev->private) == NULL) goto abort; memset (conf, 0, sizeof (*conf)); conf->mddev = mddev; if ((conf->stripe_hashtbl = (struct stripe_head **) md__get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) goto abort; memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); conf->device_lock = MD_SPIN_LOCK_UNLOCKED; md_init_waitqueue_head(&conf->wait_for_stripe); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->delayed_list); INIT_LIST_HEAD(&conf->inactive_list); atomic_set(&conf->active_stripes, 0); atomic_set(&conf->preread_active_stripes, 0); conf->buffer_size = PAGE_SIZE; /* good default for rebuild */ conf->plugged = 0; conf->plug_tq.sync = 0; conf->plug_tq.routine = &raid5_unplug_device; conf->plug_tq.data = conf; PRINTK("raid5_run(md%d) called.\n", mdidx(mddev)); ITERATE_RDEV(mddev,rdev,tmp) { /* * This is important -- we are using the descriptor on * the disk only to get a pointer to the descriptor on * the main superblock, which might be more recent. */ desc = sb->disks + rdev->desc_nr; raid_disk = desc->raid_disk; disk = conf->disks + raid_disk; if (disk_faulty(desc)) { printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", partition_name(rdev->dev)); if (!rdev->faulty) { MD_BUG(); goto abort; } disk->number = desc->number; disk->raid_disk = raid_disk; disk->dev = rdev->dev; disk->operational = 0; disk->write_only = 0; disk->spare = 0; disk->used_slot = 1; continue; } if (disk_active(desc)) { if (!disk_sync(desc)) { printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", partition_name(rdev->dev)); MD_BUG(); goto abort; } if (raid_disk > sb->raid_disks) { printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", partition_name(rdev->dev)); continue; } if (disk->operational) { printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", partition_name(rdev->dev), raid_disk); continue; } printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", partition_name(rdev->dev), raid_disk); disk->number = desc->number; disk->raid_disk = raid_disk; disk->dev = rdev->dev; disk->operational = 1; disk->used_slot = 1; conf->working_disks++; } else { /* * Must be a spare disk .. */ printk(KERN_INFO "raid5: spare disk %s\n", partition_name(rdev->dev)); disk->number = desc->number; disk->raid_disk = raid_disk; disk->dev = rdev->dev; disk->operational = 0; disk->write_only = 0; disk->spare = 1; disk->used_slot = 1; } } for (i = 0; i < MD_SB_DISKS; i++) { desc = sb->disks + i; raid_disk = desc->raid_disk; disk = conf->disks + raid_disk; if (disk_faulty(desc) && (raid_disk < sb->raid_disks) && !conf->disks[raid_disk].used_slot) { disk->number = desc->number; disk->raid_disk = raid_disk; disk->dev = MKDEV(0,0); disk->operational = 0; disk->write_only = 0; disk->spare = 0; disk->used_slot = 1; } } conf->raid_disks = sb->raid_disks; /* * 0 for a fully functional array, 1 for a degraded array. */ conf->failed_disks = conf->raid_disks - conf->working_disks; conf->mddev = mddev; conf->chunk_size = sb->chunk_size; conf->level = sb->level; conf->algorithm = sb->layout; conf->max_nr_stripes = NR_STRIPES;#if 0 for (i = 0; i < conf->raid_disks; i++) { if (!conf->disks[i].used_slot) { MD_BUG(); goto abort; } }#endif if (!conf->chunk_size || conf->chunk_size % 4) { printk(KERN_ERR "raid5: invalid chunk size %d for md%d\n", conf->chunk_size, mdidx(mddev)); goto abort; } if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { printk(KERN_ERR "raid5: unsupported parity algorithm %d for md%d\n", conf->algorithm, mdidx(mddev)); goto abort; } if (conf->failed_disks > 1) { printk(KERN_ERR "raid5: not enough operational devices for md%d (%d/%d failed)\n", mdidx(mddev), conf->failed_disks, conf->raid_disks); goto abort; } if (conf->working_disks != sb->raid_disks) { printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); start_recovery = 1; } {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -