📄 raid5.c

📁 Linux内核源代码为压缩文件是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
					locked++;				}			}		/* now if nothing is locked, and if we have enough data, we can start a write request */		if (locked == 0 && (rcw == 0 ||rmw == 0)) {			PRINTK("Computing parity...\n");			compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);			/* now every locked buffer is ready to be written */			for (i=disks; i--;)				if (buffer_locked(sh->bh_cache[i])) {					PRINTK("Writing block %d\n", i);					locked++;					action[i] = WRITE+1;					if (!conf->disks[i].operational					    || (i==sh->pd_idx && failed == 0))						set_bit(STRIPE_INSYNC, &sh->state);				}		}	}	/* maybe we need to check and possibly fix the parity for this stripe	 * Any reads will already have been scheduled, so we just see if enough data	 * is available	 */	if (syncing && locked == 0 &&	    !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) {		set_bit(STRIPE_HANDLE, &sh->state);		if (failed == 0) {			if (uptodate != disks)				BUG();			compute_parity(sh, CHECK_PARITY);			uptodate--;			bh = sh->bh_cache[sh->pd_idx];			if ((*(u32*)bh->b_data) == 0 &&			    !memcmp(bh->b_data, bh->b_data+4, bh->b_size-4)) {				/* parity is correct (on disc, not in buffer any more) */				set_bit(STRIPE_INSYNC, &sh->state);			}		}		if (!test_bit(STRIPE_INSYNC, &sh->state)) {			if (failed==0)				failed_num = sh->pd_idx;			/* should be able to compute the missing block and write it to spare */			if (!buffer_uptodate(sh->bh_cache[failed_num])) {				if (uptodate+1 != disks)					BUG();				compute_block(sh, failed_num);				uptodate++;			}			if (uptodate != disks)				BUG();			bh = sh->bh_cache[failed_num];			set_bit(BH_Lock, &bh->b_state);			action[failed_num] = WRITE+1;			locked++;			set_bit(STRIPE_INSYNC, &sh->state);			if (conf->disks[i].operational)				md_sync_acct(conf->disks[i].dev, bh->b_size>>9);			else if (conf->spare)				md_sync_acct(conf->spare->dev, bh->b_size>>9);		}	}	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {		md_done_sync(conf->mddev, (sh->size>>10) - sh->sync_redone,1);		clear_bit(STRIPE_SYNCING, &sh->state);	}			spin_unlock(&sh->lock);	while ((bh=return_ok)) {		return_ok = bh->b_reqnext;		bh->b_reqnext = NULL;		bh->b_end_io(bh, 1);	}	while ((bh=return_fail)) {		return_ok = bh->b_reqnext;		bh->b_reqnext = NULL;		bh->b_end_io(bh, 0);	}	for (i=disks; i-- ;) 		if (action[i]) {			struct buffer_head *bh = sh->bh_cache[i];			int skip = 0;			if (action[i] == READ+1)				bh->b_end_io = raid5_end_read_request;			else				bh->b_end_io = raid5_end_write_request;			if (conf->disks[i].operational)				bh->b_dev = conf->disks[i].dev;			else if (conf->spare && action[i] == WRITE+1)				bh->b_dev = conf->spare->dev;			else if (action[i] == READ+1)				BUG();			else skip=1;			if (!skip) {				PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i);				atomic_inc(&sh->count);				bh->b_rdev = bh->b_dev;				bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);				generic_make_request(action[i]-1, bh);			} else {				PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector);				clear_bit(BH_Lock, &bh->b_state);				set_bit(STRIPE_HANDLE, &sh->state);			}		}}static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh){	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;	const unsigned int raid_disks = conf->raid_disks;	const unsigned int data_disks = raid_disks - 1;	unsigned int dd_idx, pd_idx;	unsigned long new_sector;	int read_ahead = 0;	struct stripe_head *sh;	if (rw == READA) {		rw = READ;		read_ahead=1;	}	new_sector = raid5_compute_sector(bh->b_rsector,			raid_disks, data_disks, &dd_idx, &pd_idx, conf);	PRINTK("raid5_make_request, sector %lu\n", new_sector);	sh = get_active_stripe(conf, new_sector, bh->b_size, read_ahead);	if (sh) {		sh->pd_idx = pd_idx;		add_stripe_bh(sh, bh, dd_idx, rw);		handle_stripe(sh);		release_stripe(sh);	} else		bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));	return 0;}/* * Determine correct block size for this device. */unsigned int device_bsize (kdev_t dev){	unsigned int i, correct_size;	correct_size = BLOCK_SIZE;	if (blksize_size[MAJOR(dev)]) {		i = blksize_size[MAJOR(dev)][MINOR(dev)];		if (i)			correct_size = i;	}	return correct_size;}static int raid5_sync_request (mddev_t *mddev, unsigned long block_nr){	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;	struct stripe_head *sh;	int sectors_per_chunk = conf->chunk_size >> 9;	unsigned long stripe = (block_nr<<1)/sectors_per_chunk;	int chunk_offset = (block_nr<<1) % sectors_per_chunk;	int dd_idx, pd_idx;	unsigned long first_sector;	int raid_disks = conf->raid_disks;	int data_disks = raid_disks-1;	int redone = 0;	int bufsize;	sh = get_active_stripe(conf, block_nr<<1, 0, 0);	bufsize = sh->size;	redone = block_nr-(sh->sector>>1);	first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk		+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);	sh->pd_idx = pd_idx;	spin_lock(&sh->lock);		set_bit(STRIPE_SYNCING, &sh->state);	clear_bit(STRIPE_INSYNC, &sh->state);	sh->sync_redone = redone;	spin_unlock(&sh->lock);	handle_stripe(sh);	release_stripe(sh);	return (bufsize>>10)-redone;}/* * This is our raid5 kernel thread. * * We scan the hash table for stripes which can be handled now. * During the scan, completed stripes are saved for us by the interrupt * handler, so that they will not have to wait for our next wakeup. */static void raid5d (void *data){	struct stripe_head *sh;	raid5_conf_t *conf = data;	mddev_t *mddev = conf->mddev;	int handled;	PRINTK("+++ raid5d active\n");	handled = 0;	if (mddev->sb_dirty) {		mddev->sb_dirty = 0;		md_update_sb(mddev);	}	md_spin_lock_irq(&conf->device_lock);	while (!list_empty(&conf->handle_list)) {		struct list_head *first = conf->handle_list.next;		sh = list_entry(first, struct stripe_head, lru);		list_del_init(first);		atomic_inc(&sh->count);		if (atomic_read(&sh->count)!= 1)			BUG();		md_spin_unlock_irq(&conf->device_lock);				handled++;		handle_stripe(sh);		release_stripe(sh);		md_spin_lock_irq(&conf->device_lock);	}	PRINTK("%d stripes handled\n", handled);	md_spin_unlock_irq(&conf->device_lock);	PRINTK("--- raid5d inactive\n");}/* * Private kernel thread for parity reconstruction after an unclean * shutdown. Reconstruction on spare drives in case of a failed drive * is done by the generic mdsyncd. */static void raid5syncd (void *data){	raid5_conf_t *conf = data;	mddev_t *mddev = conf->mddev;	if (!conf->resync_parity)		return;	if (conf->resync_parity == 2)		return;	down(&mddev->recovery_sem);	if (md_do_sync(mddev,NULL)) {		up(&mddev->recovery_sem);		printk("raid5: resync aborted!\n");		return;	}	conf->resync_parity = 0;	up(&mddev->recovery_sem);	printk("raid5: resync finished.\n");}static int __check_consistency (mddev_t *mddev, int row){	raid5_conf_t *conf = mddev->private;	kdev_t dev;	struct buffer_head *bh[MD_SB_DISKS], *tmp = NULL;	int i, ret = 0, nr = 0, count;	struct buffer_head *bh_ptr[MAX_XOR_BLOCKS];	if (conf->working_disks != conf->raid_disks)		goto out;	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);	tmp->b_size = 4096;	tmp->b_page = alloc_page(GFP_KERNEL);	tmp->b_data = page_address(tmp->b_page);	if (!tmp->b_data)		goto out;	md_clear_page(tmp->b_data);	memset(bh, 0, MD_SB_DISKS * sizeof(struct buffer_head *));	for (i = 0; i < conf->raid_disks; i++) {		dev = conf->disks[i].dev;		set_blocksize(dev, 4096);		bh[i] = bread(dev, row / 4, 4096);		if (!bh[i])			break;		nr++;	}	if (nr == conf->raid_disks) {		bh_ptr[0] = tmp;		count = 1;		for (i = 1; i < nr; i++) {			bh_ptr[count++] = bh[i];			if (count == MAX_XOR_BLOCKS) {				xor_block(count, &bh_ptr[0]);				count = 1;			}		}		if (count != 1) {			xor_block(count, &bh_ptr[0]);		}		if (memcmp(tmp->b_data, bh[0]->b_data, 4096))			ret = 1;	}	for (i = 0; i < conf->raid_disks; i++) {		dev = conf->disks[i].dev;		if (bh[i]) {			bforget(bh[i]);			bh[i] = NULL;		}		fsync_dev(dev);		invalidate_buffers(dev);	}	free_page((unsigned long) tmp->b_data);out:	if (tmp)		kfree(tmp);	return ret;}static int check_consistency (mddev_t *mddev){	if (__check_consistency(mddev, 0))/* * We are not checking this currently, as it's legitimate to have * an inconsistent array, at creation time. */		return 0;	return 0;}static int raid5_run (mddev_t *mddev){	raid5_conf_t *conf;	int i, j, raid_disk, memory;	mdp_super_t *sb = mddev->sb;	mdp_disk_t *desc;	mdk_rdev_t *rdev;	struct disk_info *disk;	struct md_list_head *tmp;	int start_recovery = 0;	MOD_INC_USE_COUNT;	if (sb->level != 5 && sb->level != 4) {		printk("raid5: md%d: raid level not set to 4/5 (%d)\n", mdidx(mddev), sb->level);		MOD_DEC_USE_COUNT;		return -EIO;	}	mddev->private = kmalloc (sizeof (raid5_conf_t), GFP_KERNEL);	if ((conf = mddev->private) == NULL)		goto abort;	memset (conf, 0, sizeof (*conf));	conf->mddev = mddev;	if ((conf->stripe_hashtbl = (struct stripe_head **) md__get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL)		goto abort;	memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);	conf->device_lock = MD_SPIN_LOCK_UNLOCKED;	md_init_waitqueue_head(&conf->wait_for_stripe);	INIT_LIST_HEAD(&conf->handle_list);	INIT_LIST_HEAD(&conf->inactive_list);	atomic_set(&conf->active_stripes, 0);	conf->buffer_size = PAGE_SIZE; /* good default for rebuild */	PRINTK("raid5_run(md%d) called.\n", mdidx(mddev));	ITERATE_RDEV(mddev,rdev,tmp) {		/*		 * This is important -- we are using the descriptor on		 * the disk only to get a pointer to the descriptor on		 * the main superblock, which might be more recent.		 */		desc = sb->disks + rdev->desc_nr;		raid_disk = desc->raid_disk;		disk = conf->disks + raid_disk;		if (disk_faulty(desc)) {			printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", partition_name(rdev->dev));			if (!rdev->faulty) {				MD_BUG();				goto abort;			}			disk->number = desc->number;			disk->raid_disk = raid_disk;			disk->dev = rdev->dev;			disk->operational = 0;			disk->write_only = 0;			disk->spare = 0;			disk->used_slot = 1;			continue;		}		if (disk_active(desc)) {			if (!disk_sync(desc)) {				printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", partition_name(rdev->dev));				MD_BUG();				goto abort;			}			if (raid_disk > sb->raid_disks) {				printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", partition_name(rdev->dev));				continue;			}			if (disk->operational) {				printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", partition_name(rdev->dev), raid_disk);				continue;			}			printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", partition_name(rdev->dev), raid_disk);				disk->number = desc->number;			disk->raid_disk = raid_disk;			disk->dev = rdev->dev;			disk->operational = 1;			disk->used_slot = 1;			conf->working_disks++;		} else {			/*			 * Must be a spare disk ..			 */			printk(KERN_INFO "raid5: spare disk %s\n", partition_name(rdev->dev));			disk->number = desc->number;			disk->raid_disk = raid_disk;			disk->dev = rdev->dev;			disk->operational = 0;			disk->write_only = 0;			disk->spare = 1;			disk->used_slot = 1;		}	}	for (i = 0; i < MD_SB_DISKS; i++) {		desc = sb->disks + i;		raid_disk = desc->raid_disk;		disk = conf->disks + raid_disk;		if (disk_faulty(desc) && (raid_disk < sb->raid_disks) &&			!conf->disks[raid_disk].used_slot) {			disk->number = desc->number;			disk->raid_disk = raid_disk;			disk->dev = MKDEV(0,0);			disk->operational = 0;			disk->write_only = 0;			disk->spare = 0;			disk->used_slot = 1;		}	}	conf->raid_disks = sb->raid_disks;	/*	 * 0 for a fully functional array, 1 for a degraded array.	 */	conf->failed_disks = conf->raid_disks - conf->working_disks;	conf->mddev = mddev;	conf->chunk_size = sb->chunk_size;	conf->level = sb->level;	conf->algorithm = sb->layout;	conf->max_nr_stripes = NR_STRIPES;#if 0	for (i = 0; i < conf->raid_disks; i++) {		if (!conf->disks[i].used_slot) {			MD_BUG();			goto abort;		}	}#endif	if (!conf->chunk_size || conf->chunk_size % 4) {		printk(KERN_ERR "raid5: invalid chunk size %d for md%d\n", conf->chunk_size, mdidx(mddev));		goto abort;	}	if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {		printk(KERN_ERR "raid5: unsupported parity algorithm %d for md%d\n", conf->algorithm, mdidx(mddev));		goto abort;	}	if (conf->failed_disks > 1) {		printk(KERN_ERR "raid5: not enough operational devices for md%d (%d/%d failed)\n", mdidx(mddev), conf->failed_disks, conf->raid_disks);		goto abort;	}	if (conf->working_disks != sb->raid_disks) {		printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev));		start_recovery = 1;	}	if (!start_recovery && (sb->state & (1 << MD_SB_CLEAN)) &&			check_consistency(mddev)) {		printk(KERN_ERR "raid5: detected raid-5 superblock xor inconsistency -- running resync\n");		sb->state &= ~(1 << MD_SB_CLEAN);	}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -