xen-blkfront.c

来自「linux 内核源代码」· C语言 代码 · 共 985 行 · 第 1/2 页

C
985
字号
/* * blkfront.c * * XenLinux virtual block device driver. * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * Copyright (c) 2004, Christian Limpach * Copyright (c) 2004, Andrew Warfield * Copyright (c) 2005, Christopher Clark * Copyright (c) 2005, XenSource Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */#include <linux/interrupt.h>#include <linux/blkdev.h>#include <linux/module.h>#include <xen/xenbus.h>#include <xen/grant_table.h>#include <xen/events.h>#include <xen/page.h>#include <xen/interface/grant_table.h>#include <xen/interface/io/blkif.h>#include <asm/xen/hypervisor.h>enum blkif_state {	BLKIF_STATE_DISCONNECTED,	BLKIF_STATE_CONNECTED,	BLKIF_STATE_SUSPENDED,};struct blk_shadow {	struct blkif_request req;	unsigned long request;	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];};static struct block_device_operations xlvbd_block_fops;#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)/* * We have one of these per vbd, whether ide, scsi or 'other'.  They * hang in private_data off the gendisk structure. We may end up * putting all kinds of interesting stuff here :-) */struct blkfront_info{	struct xenbus_device *xbdev;	dev_t dev;	struct gendisk *gd;	int vdevice;	blkif_vdev_t handle;	enum blkif_state connected;	int ring_ref;	struct blkif_front_ring ring;	unsigned int evtchn, irq;	struct request_queue *rq;	struct work_struct work;	struct gnttab_free_callback callback;	struct blk_shadow shadow[BLK_RING_SIZE];	unsigned long shadow_free;	int feature_barrier;	/**	 * The number of people holding this device open.  We won't allow a	 * hot-unplug unless this is 0.	 */	int users;};static DEFINE_SPINLOCK(blkif_io_lock);#define MAXIMUM_OUTSTANDING_BLOCK_REQS \	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)#define GRANT_INVALID_REF	0#define PARTS_PER_DISK		16#define BLKIF_MAJOR(dev) ((dev)>>8)#define BLKIF_MINOR(dev) ((dev) & 0xff)#define DEV_NAME	"xvd"	/* name in /dev *//* Information about our VBDs. */#define MAX_VBDS 64static LIST_HEAD(vbds_list);static int get_id_from_freelist(struct blkfront_info *info){	unsigned long free = info->shadow_free;	BUG_ON(free > BLK_RING_SIZE);	info->shadow_free = info->shadow[free].req.id;	info->shadow[free].req.id = 0x0fffffee; /* debug */	return free;}static void add_id_to_freelist(struct blkfront_info *info,			       unsigned long id){	info->shadow[id].req.id  = info->shadow_free;	info->shadow[id].request = 0;	info->shadow_free = id;}static void blkif_restart_queue_callback(void *arg){	struct blkfront_info *info = (struct blkfront_info *)arg;	schedule_work(&info->work);}/* * blkif_queue_request * * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} * buffer: buffer to read/write into. this should be a *   virtual address in the guest os. */static int blkif_queue_request(struct request *req){	struct blkfront_info *info = req->rq_disk->private_data;	unsigned long buffer_mfn;	struct blkif_request *ring_req;	struct req_iterator iter;	struct bio_vec *bvec;	unsigned long id;	unsigned int fsect, lsect;	int ref;	grant_ref_t gref_head;	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))		return 1;	if (gnttab_alloc_grant_references(		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {		gnttab_request_free_callback(			&info->callback,			blkif_restart_queue_callback,			info,			BLKIF_MAX_SEGMENTS_PER_REQUEST);		return 1;	}	/* Fill out a communications ring structure. */	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);	id = get_id_from_freelist(info);	info->shadow[id].request = (unsigned long)req;	ring_req->id = id;	ring_req->sector_number = (blkif_sector_t)req->sector;	ring_req->handle = info->handle;	ring_req->operation = rq_data_dir(req) ?		BLKIF_OP_WRITE : BLKIF_OP_READ;	if (blk_barrier_rq(req))		ring_req->operation = BLKIF_OP_WRITE_BARRIER;	ring_req->nr_segments = 0;	rq_for_each_segment(bvec, req, iter) {		BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST);		buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));		fsect = bvec->bv_offset >> 9;		lsect = fsect + (bvec->bv_len >> 9) - 1;		/* install a grant reference. */		ref = gnttab_claim_grant_reference(&gref_head);		BUG_ON(ref == -ENOSPC);		gnttab_grant_foreign_access_ref(				ref,				info->xbdev->otherend_id,				buffer_mfn,				rq_data_dir(req) );		info->shadow[id].frame[ring_req->nr_segments] =				mfn_to_pfn(buffer_mfn);		ring_req->seg[ring_req->nr_segments] =				(struct blkif_request_segment) {					.gref       = ref,					.first_sect = fsect,					.last_sect  = lsect };		ring_req->nr_segments++;	}	info->ring.req_prod_pvt++;	/* Keep a private copy so we can reissue requests when recovering. */	info->shadow[id].req = *ring_req;	gnttab_free_grant_references(gref_head);	return 0;}static inline void flush_requests(struct blkfront_info *info){	int notify;	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);	if (notify)		notify_remote_via_irq(info->irq);}/* * do_blkif_request *  read a block; request is in a request queue */static void do_blkif_request(struct request_queue *rq){	struct blkfront_info *info = NULL;	struct request *req;	int queued;	pr_debug("Entered do_blkif_request\n");	queued = 0;	while ((req = elv_next_request(rq)) != NULL) {		info = req->rq_disk->private_data;		if (!blk_fs_request(req)) {			end_request(req, 0);			continue;		}		if (RING_FULL(&info->ring))			goto wait;		pr_debug("do_blk_req %p: cmd %p, sec %lx, "			 "(%u/%li) buffer:%p [%s]\n",			 req, req->cmd, (unsigned long)req->sector,			 req->current_nr_sectors,			 req->nr_sectors, req->buffer,			 rq_data_dir(req) ? "write" : "read");		blkdev_dequeue_request(req);		if (blkif_queue_request(req)) {			blk_requeue_request(rq, req);wait:			/* Avoid pointless unplugs. */			blk_stop_queue(rq);			break;		}		queued++;	}	if (queued != 0)		flush_requests(info);}static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size){	struct request_queue *rq;	rq = blk_init_queue(do_blkif_request, &blkif_io_lock);	if (rq == NULL)		return -1;	elevator_init(rq, "noop");	/* Hard sector size and max sectors impersonate the equiv. hardware. */	blk_queue_hardsect_size(rq, sector_size);	blk_queue_max_sectors(rq, 512);	/* Each segment in a request is up to an aligned page in size. */	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);	blk_queue_max_segment_size(rq, PAGE_SIZE);	/* Ensure a merged request will fit in a single I/O ring slot. */	blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);	blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);	/* Make sure buffer addresses are sector-aligned. */	blk_queue_dma_alignment(rq, 511);	gd->queue = rq;	return 0;}static int xlvbd_barrier(struct blkfront_info *info){	int err;	err = blk_queue_ordered(info->rq,				info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,				NULL);	if (err)		return err;	printk(KERN_INFO "blkfront: %s: barriers %s\n",	       info->gd->disk_name,	       info->feature_barrier ? "enabled" : "disabled");	return 0;}static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,			       int vdevice, u16 vdisk_info, u16 sector_size,			       struct blkfront_info *info){	struct gendisk *gd;	int nr_minors = 1;	int err = -ENODEV;	BUG_ON(info->gd != NULL);	BUG_ON(info->rq != NULL);	if ((minor % PARTS_PER_DISK) == 0)		nr_minors = PARTS_PER_DISK;	gd = alloc_disk(nr_minors);	if (gd == NULL)		goto out;	if (nr_minors > 1)		sprintf(gd->disk_name, "%s%c", DEV_NAME,			'a' + minor / PARTS_PER_DISK);	else		sprintf(gd->disk_name, "%s%c%d", DEV_NAME,			'a' + minor / PARTS_PER_DISK,			minor % PARTS_PER_DISK);	gd->major = XENVBD_MAJOR;	gd->first_minor = minor;	gd->fops = &xlvbd_block_fops;	gd->private_data = info;	gd->driverfs_dev = &(info->xbdev->dev);	set_capacity(gd, capacity);	if (xlvbd_init_blk_queue(gd, sector_size)) {		del_gendisk(gd);		goto out;	}	info->rq = gd->queue;	info->gd = gd;	if (info->feature_barrier)		xlvbd_barrier(info);	if (vdisk_info & VDISK_READONLY)		set_disk_ro(gd, 1);	if (vdisk_info & VDISK_REMOVABLE)		gd->flags |= GENHD_FL_REMOVABLE;	if (vdisk_info & VDISK_CDROM)		gd->flags |= GENHD_FL_CD;	return 0; out:	return err;}static void kick_pending_request_queues(struct blkfront_info *info){	if (!RING_FULL(&info->ring)) {		/* Re-enable calldowns. */		blk_start_queue(info->rq);		/* Kick things off immediately. */		do_blkif_request(info->rq);	}}static void blkif_restart_queue(struct work_struct *work){	struct blkfront_info *info = container_of(work, struct blkfront_info, work);	spin_lock_irq(&blkif_io_lock);	if (info->connected == BLKIF_STATE_CONNECTED)		kick_pending_request_queues(info);	spin_unlock_irq(&blkif_io_lock);}static void blkif_free(struct blkfront_info *info, int suspend){	/* Prevent new requests being issued until we fix things up. */	spin_lock_irq(&blkif_io_lock);	info->connected = suspend ?		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;	/* No more blkif_request(). */	if (info->rq)		blk_stop_queue(info->rq);	/* No more gnttab callback work. */	gnttab_cancel_free_callback(&info->callback);	spin_unlock_irq(&blkif_io_lock);	/* Flush gnttab callback work. Must be done with no locks held. */	flush_scheduled_work();	/* Free resources associated with old device channel. */	if (info->ring_ref != GRANT_INVALID_REF) {		gnttab_end_foreign_access(info->ring_ref, 0,					  (unsigned long)info->ring.sring);		info->ring_ref = GRANT_INVALID_REF;		info->ring.sring = NULL;	}	if (info->irq)		unbind_from_irqhandler(info->irq, info);	info->evtchn = info->irq = 0;}static void blkif_completion(struct blk_shadow *s){	int i;	for (i = 0; i < s->req.nr_segments; i++)		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);}static irqreturn_t blkif_interrupt(int irq, void *dev_id){	struct request *req;	struct blkif_response *bret;	RING_IDX i, rp;	unsigned long flags;	struct blkfront_info *info = (struct blkfront_info *)dev_id;	int uptodate;	spin_lock_irqsave(&blkif_io_lock, flags);	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {		spin_unlock_irqrestore(&blkif_io_lock, flags);		return IRQ_HANDLED;	} again:	rp = info->ring.sring->rsp_prod;	rmb(); /* Ensure we see queued responses up to 'rp'. */	for (i = info->ring.rsp_cons; i != rp; i++) {		unsigned long id;		int ret;		bret = RING_GET_RESPONSE(&info->ring, i);		id   = bret->id;		req  = (struct request *)info->shadow[id].request;		blkif_completion(&info->shadow[id]);		add_id_to_freelist(info, id);		uptodate = (bret->status == BLKIF_RSP_OKAY);		switch (bret->operation) {		case BLKIF_OP_WRITE_BARRIER:			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",				       info->gd->disk_name);				uptodate = -EOPNOTSUPP;				info->feature_barrier = 0;				xlvbd_barrier(info);			}			/* fall through */		case BLKIF_OP_READ:		case BLKIF_OP_WRITE:			if (unlikely(bret->status != BLKIF_RSP_OKAY))

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?