raid5.c
来自「linux 内核源代码」· C语言 代码 · 共 2,325 行 · 第 1/5 页
C
2,325 行
if (grow_buffers(sh, conf->raid_disks)) { shrink_buffers(sh, conf->raid_disks); kmem_cache_free(conf->slab_cache, sh); return 0; } sh->disks = conf->raid_disks; /* we just created an active stripe so... */ atomic_set(&sh->count, 1); atomic_inc(&conf->active_stripes); INIT_LIST_HEAD(&sh->lru); release_stripe(sh); return 1;}static int grow_stripes(raid5_conf_t *conf, int num){ struct kmem_cache *sc; int devs = conf->raid_disks; sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); conf->active_name = 0; sc = kmem_cache_create(conf->cache_name[conf->active_name], sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), 0, 0, NULL); if (!sc) return 1; conf->slab_cache = sc; conf->pool_size = devs; while (num--) if (!grow_one_stripe(conf)) return 1; return 0;}#ifdef CONFIG_MD_RAID5_RESHAPEstatic int resize_stripes(raid5_conf_t *conf, int newsize){ /* Make all the stripes able to hold 'newsize' devices. * New slots in each stripe get 'page' set to a new page. * * This happens in stages: * 1/ create a new kmem_cache and allocate the required number of * stripe_heads. * 2/ gather all the old stripe_heads and tranfer the pages across * to the new stripe_heads. This will have the side effect of * freezing the array as once all stripe_heads have been collected, * no IO will be possible. Old stripe heads are freed once their * pages have been transferred over, and the old kmem_cache is * freed when all stripes are done. * 3/ reallocate conf->disks to be suitable bigger. If this fails, * we simple return a failre status - no need to clean anything up. * 4/ allocate new pages for the new slots in the new stripe_heads. * If this fails, we don't bother trying the shrink the * stripe_heads down again, we just leave them as they are. * As each stripe_head is processed the new one is released into * active service. * * Once step2 is started, we cannot afford to wait for a write, * so we use GFP_NOIO allocations. */ struct stripe_head *osh, *nsh; LIST_HEAD(newstripes); struct disk_info *ndisks; int err = 0; struct kmem_cache *sc; int i; if (newsize <= conf->pool_size) return 0; /* never bother to shrink */ md_allow_write(conf->mddev); /* Step 1 */ sc = kmem_cache_create(conf->cache_name[1-conf->active_name], sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), 0, 0, NULL); if (!sc) return -ENOMEM; for (i = conf->max_nr_stripes; i; i--) { nsh = kmem_cache_alloc(sc, GFP_KERNEL); if (!nsh) break; memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); nsh->raid_conf = conf; spin_lock_init(&nsh->lock); list_add(&nsh->lru, &newstripes); } if (i) { /* didn't get enough, give up */ while (!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); list_del(&nsh->lru); kmem_cache_free(sc, nsh); } kmem_cache_destroy(sc); return -ENOMEM; } /* Step 2 - Must use GFP_NOIO now. * OK, we have enough stripes, start collecting inactive * stripes and copying them over */ list_for_each_entry(nsh, &newstripes, lru) { spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list), conf->device_lock, unplug_slaves(conf->mddev) ); osh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); atomic_set(&nsh->count, 1); for(i=0; i<conf->pool_size; i++) nsh->dev[i].page = osh->dev[i].page; for( ; i<newsize; i++) nsh->dev[i].page = NULL; kmem_cache_free(conf->slab_cache, osh); } kmem_cache_destroy(conf->slab_cache); /* Step 3. * At this point, we are holding all the stripes so the array * is completely stalled, so now is a good time to resize * conf->disks. */ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); if (ndisks) { for (i=0; i<conf->raid_disks; i++) ndisks[i] = conf->disks[i]; kfree(conf->disks); conf->disks = ndisks; } else err = -ENOMEM; /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); list_del_init(&nsh->lru); for (i=conf->raid_disks; i < newsize; i++) if (nsh->dev[i].page == NULL) { struct page *p = alloc_page(GFP_NOIO); nsh->dev[i].page = p; if (!p) err = -ENOMEM; } release_stripe(nsh); } /* critical section pass, GFP_NOIO no longer needed */ conf->slab_cache = sc; conf->active_name = 1-conf->active_name; conf->pool_size = newsize; return err;}#endifstatic int drop_one_stripe(raid5_conf_t *conf){ struct stripe_head *sh; spin_lock_irq(&conf->device_lock); sh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); if (!sh) return 0; BUG_ON(atomic_read(&sh->count)); shrink_buffers(sh, conf->pool_size); kmem_cache_free(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); return 1;}static void shrink_stripes(raid5_conf_t *conf){ while (drop_one_stripe(conf)) ; if (conf->slab_cache) kmem_cache_destroy(conf->slab_cache); conf->slab_cache = NULL;}static void raid5_end_read_request(struct bio * bi, int error){ struct stripe_head *sh = bi->bi_private; raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); char b[BDEVNAME_SIZE]; mdk_rdev_t *rdev; for (i=0 ; i<disks; i++) if (bi == &sh->dev[i].req) break; pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { BUG(); return; } if (uptodate) { set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { rdev = conf->disks[i].rdev; printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", mdname(conf->mddev), STRIPE_SECTORS, (unsigned long long)sh->sector + rdev->data_offset, bdevname(rdev->bdev, b)); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } if (atomic_read(&conf->disks[i].rdev->read_errors)) atomic_set(&conf->disks[i].rdev->read_errors, 0); } else { const char *bdn = bdevname(conf->disks[i].rdev->bdev, b); int retry = 0; rdev = conf->disks[i].rdev; clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); if (conf->mddev->degraded) printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", mdname(conf->mddev), (unsigned long long)sh->sector + rdev->data_offset, bdn); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", mdname(conf->mddev), (unsigned long long)sh->sector + rdev->data_offset, bdn); else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING "raid5:%s: Too many read errors, failing device %s.\n", mdname(conf->mddev), bdn); else retry = 1; if (retry) set_bit(R5_ReadError, &sh->dev[i].flags); else { clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); md_error(conf->mddev, rdev); } } rdev_dec_pending(conf->disks[i].rdev, conf->mddev); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh);}static void raid5_end_write_request (struct bio *bi, int error){ struct stripe_head *sh = bi->bi_private; raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); for (i=0 ; i<disks; i++) if (bi == &sh->dev[i].req) break; pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), uptodate); if (i == disks) { BUG(); return; } if (!uptodate) md_error(conf->mddev, conf->disks[i].rdev); rdev_dec_pending(conf->disks[i].rdev, conf->mddev); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh);}static sector_t compute_blocknr(struct stripe_head *sh, int i); static void raid5_build_block (struct stripe_head *sh, int i){ struct r5dev *dev = &sh->dev[i]; bio_init(&dev->req); dev->req.bi_io_vec = &dev->vec; dev->req.bi_vcnt++; dev->req.bi_max_vecs++; dev->vec.bv_page = dev->page; dev->vec.bv_len = STRIPE_SIZE; dev->vec.bv_offset = 0; dev->req.bi_sector = sh->sector; dev->req.bi_private = sh; dev->flags = 0; dev->sector = compute_blocknr(sh, i);}static void error(mddev_t *mddev, mdk_rdev_t *rdev){ char b[BDEVNAME_SIZE]; raid5_conf_t *conf = (raid5_conf_t *) mddev->private; pr_debug("raid5: error called\n"); if (!test_bit(Faulty, &rdev->flags)) { set_bit(MD_CHANGE_DEVS, &mddev->flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery was running, make sure it aborts. */ set_bit(MD_RECOVERY_ERR, &mddev->recovery); } set_bit(Faulty, &rdev->flags); printk (KERN_ALERT "raid5: Disk failure on %s, disabling device." " Operation continuing on %d devices\n", bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); }}/* * Input: a 'big' sector number, * Output: index of the data and parity disk, and the sector # in them. */static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, unsigned int data_disks, unsigned int * dd_idx, unsigned int * pd_idx, raid5_conf_t *conf){ long stripe; unsigned long chunk_number; unsigned int chunk_offset; sector_t new_sector; int sectors_per_chunk = conf->chunk_size >> 9; /* First compute the information on this sector */ /* * Compute the chunk number and the sector offset inside the chunk */ chunk_offset = sector_div(r_sector, sectors_per_chunk); chunk_number = r_sector; BUG_ON(r_sector != chunk_number); /* * Compute the stripe number */ stripe = chunk_number / data_disks; /* * Compute the data disk and parity disk indexes inside the stripe */ *dd_idx = chunk_number % data_disks; /* * Select the parity disk based on the user selected algorithm. */ switch(conf->level) { case 4: *pd_idx = data_disks; break; case 5: switch (conf->algorithm) { case ALGORITHM_LEFT_ASYMMETRIC: *pd_idx = data_disks - stripe % raid_disks; if (*dd_idx >= *pd_idx) (*dd_idx)++; break; case ALGORITHM_RIGHT_ASYMMETRIC: *pd_idx = stripe % raid_disks; if (*dd_idx >= *pd_idx) (*dd_idx)++; break; case ALGORITHM_LEFT_SYMMETRIC: *pd_idx = data_disks - stripe % raid_disks; *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; break; case ALGORITHM_RIGHT_SYMMETRIC: *pd_idx = stripe % raid_disks; *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; break; default: printk(KERN_ERR "raid5: unsupported algorithm %d\n", conf->algorithm); } break; case 6: /**** FIX THIS ****/ switch (conf->algorithm) { case ALGORITHM_LEFT_ASYMMETRIC: *pd_idx = raid_disks - 1 - (stripe % raid_disks); if (*pd_idx == raid_disks-1) (*dd_idx)++; /* Q D D D P */ else if (*dd_idx >= *pd_idx) (*dd_idx) += 2; /* D D P Q D */ break; case ALGORITHM_RIGHT_ASYMMETRIC: *pd_idx = stripe % raid_disks; if (*pd_idx == raid_disks-1) (*dd_idx)++; /* Q D D D P */ else if (*dd_idx >= *pd_idx) (*dd_idx) += 2; /* D D P Q D */ break; case ALGORITHM_LEFT_SYMMETRIC: *pd_idx = raid_disks - 1 - (stripe % raid_disks); *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; break; case ALGORITHM_RIGHT_SYMMETRIC: *pd_idx = stripe % raid_disks; *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; break; default: printk (KERN_CRIT "raid6: unsupported algorithm %d\n", conf->algorithm); } break; } /* * Finally, compute the new sector number */ new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset; return new_sector;}static sector_t compute_blocknr(struct stripe_head *sh, int i){ raid5_conf_t *conf = sh->raid_conf; int raid_disks = sh->disks; int data_disks = raid_disks - conf->max_degraded; sector_t new_sector = sh->sector, check; int sectors_per_chunk = conf->chunk_size >> 9; sector_t stripe; int chunk_offset; int chunk_number, dummy1, dummy2, dd_idx = i; sector_t r_sector; chunk_offset = sector_div(new_sector, sectors_per_chunk); stripe = new_sector; BUG_ON(new_sector != stripe); if (i == sh->pd_idx) return 0; switch(conf->level) { case 4: break;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?