xen-blkfront.c
来自「linux 内核源代码」· C语言 代码 · 共 985 行 · 第 1/2 页
C
985 行
/* * blkfront.c * * XenLinux virtual block device driver. * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * Copyright (c) 2004, Christian Limpach * Copyright (c) 2004, Andrew Warfield * Copyright (c) 2005, Christopher Clark * Copyright (c) 2005, XenSource Ltd * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */#include <linux/interrupt.h>#include <linux/blkdev.h>#include <linux/module.h>#include <xen/xenbus.h>#include <xen/grant_table.h>#include <xen/events.h>#include <xen/page.h>#include <xen/interface/grant_table.h>#include <xen/interface/io/blkif.h>#include <asm/xen/hypervisor.h>enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, BLKIF_STATE_SUSPENDED,};struct blk_shadow { struct blkif_request req; unsigned long request; unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];};static struct block_device_operations xlvbd_block_fops;#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)/* * We have one of these per vbd, whether ide, scsi or 'other'. They * hang in private_data off the gendisk structure. We may end up * putting all kinds of interesting stuff here :-) */struct blkfront_info{ struct xenbus_device *xbdev; dev_t dev; struct gendisk *gd; int vdevice; blkif_vdev_t handle; enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; int feature_barrier; /** * The number of people holding this device open. We won't allow a * hot-unplug unless this is 0. */ int users;};static DEFINE_SPINLOCK(blkif_io_lock);#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)#define GRANT_INVALID_REF 0#define PARTS_PER_DISK 16#define BLKIF_MAJOR(dev) ((dev)>>8)#define BLKIF_MINOR(dev) ((dev) & 0xff)#define DEV_NAME "xvd" /* name in /dev *//* Information about our VBDs. */#define MAX_VBDS 64static LIST_HEAD(vbds_list);static int get_id_from_freelist(struct blkfront_info *info){ unsigned long free = info->shadow_free; BUG_ON(free > BLK_RING_SIZE); info->shadow_free = info->shadow[free].req.id; info->shadow[free].req.id = 0x0fffffee; /* debug */ return free;}static void add_id_to_freelist(struct blkfront_info *info, unsigned long id){ info->shadow[id].req.id = info->shadow_free; info->shadow[id].request = 0; info->shadow_free = id;}static void blkif_restart_queue_callback(void *arg){ struct blkfront_info *info = (struct blkfront_info *)arg; schedule_work(&info->work);}/* * blkif_queue_request * * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} * buffer: buffer to read/write into. this should be a * virtual address in the guest os. */static int blkif_queue_request(struct request *req){ struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_mfn; struct blkif_request *ring_req; struct req_iterator iter; struct bio_vec *bvec; unsigned long id; unsigned int fsect, lsect; int ref; grant_ref_t gref_head; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; if (gnttab_alloc_grant_references( BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { gnttab_request_free_callback( &info->callback, blkif_restart_queue_callback, info, BLKIF_MAX_SEGMENTS_PER_REQUEST); return 1; } /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); id = get_id_from_freelist(info); info->shadow[id].request = (unsigned long)req; ring_req->id = id; ring_req->sector_number = (blkif_sector_t)req->sector; ring_req->handle = info->handle; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; if (blk_barrier_rq(req)) ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = 0; rq_for_each_segment(bvec, req, iter) { BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST); buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page)); fsect = bvec->bv_offset >> 9; lsect = fsect + (bvec->bv_len >> 9) - 1; /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); gnttab_grant_foreign_access_ref( ref, info->xbdev->otherend_id, buffer_mfn, rq_data_dir(req) ); info->shadow[id].frame[ring_req->nr_segments] = mfn_to_pfn(buffer_mfn); ring_req->seg[ring_req->nr_segments] = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; ring_req->nr_segments++; } info->ring.req_prod_pvt++; /* Keep a private copy so we can reissue requests when recovering. */ info->shadow[id].req = *ring_req; gnttab_free_grant_references(gref_head); return 0;}static inline void flush_requests(struct blkfront_info *info){ int notify; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); if (notify) notify_remote_via_irq(info->irq);}/* * do_blkif_request * read a block; request is in a request queue */static void do_blkif_request(struct request_queue *rq){ struct blkfront_info *info = NULL; struct request *req; int queued; pr_debug("Entered do_blkif_request\n"); queued = 0; while ((req = elv_next_request(rq)) != NULL) { info = req->rq_disk->private_data; if (!blk_fs_request(req)) { end_request(req, 0); continue; } if (RING_FULL(&info->ring)) goto wait; pr_debug("do_blk_req %p: cmd %p, sec %lx, " "(%u/%li) buffer:%p [%s]\n", req, req->cmd, (unsigned long)req->sector, req->current_nr_sectors, req->nr_sectors, req->buffer, rq_data_dir(req) ? "write" : "read"); blkdev_dequeue_request(req); if (blkif_queue_request(req)) { blk_requeue_request(rq, req);wait: /* Avoid pointless unplugs. */ blk_stop_queue(rq); break; } queued++; } if (queued != 0) flush_requests(info);}static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size){ struct request_queue *rq; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) return -1; elevator_init(rq, "noop"); /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_hardsect_size(rq, sector_size); blk_queue_max_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); gd->queue = rq; return 0;}static int xlvbd_barrier(struct blkfront_info *info){ int err; err = blk_queue_ordered(info->rq, info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL); if (err) return err; printk(KERN_INFO "blkfront: %s: barriers %s\n", info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled"); return 0;}static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice, u16 vdisk_info, u16 sector_size, struct blkfront_info *info){ struct gendisk *gd; int nr_minors = 1; int err = -ENODEV; BUG_ON(info->gd != NULL); BUG_ON(info->rq != NULL); if ((minor % PARTS_PER_DISK) == 0) nr_minors = PARTS_PER_DISK; gd = alloc_disk(nr_minors); if (gd == NULL) goto out; if (nr_minors > 1) sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + minor / PARTS_PER_DISK); else sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 'a' + minor / PARTS_PER_DISK, minor % PARTS_PER_DISK); gd->major = XENVBD_MAJOR; gd->first_minor = minor; gd->fops = &xlvbd_block_fops; gd->private_data = info; gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); if (xlvbd_init_blk_queue(gd, sector_size)) { del_gendisk(gd); goto out; } info->rq = gd->queue; info->gd = gd; if (info->feature_barrier) xlvbd_barrier(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); if (vdisk_info & VDISK_REMOVABLE) gd->flags |= GENHD_FL_REMOVABLE; if (vdisk_info & VDISK_CDROM) gd->flags |= GENHD_FL_CD; return 0; out: return err;}static void kick_pending_request_queues(struct blkfront_info *info){ if (!RING_FULL(&info->ring)) { /* Re-enable calldowns. */ blk_start_queue(info->rq); /* Kick things off immediately. */ do_blkif_request(info->rq); }}static void blkif_restart_queue(struct work_struct *work){ struct blkfront_info *info = container_of(work, struct blkfront_info, work); spin_lock_irq(&blkif_io_lock); if (info->connected == BLKIF_STATE_CONNECTED) kick_pending_request_queues(info); spin_unlock_irq(&blkif_io_lock);}static void blkif_free(struct blkfront_info *info, int suspend){ /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); info->connected = suspend ? BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; /* No more blkif_request(). */ if (info->rq) blk_stop_queue(info->rq); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); spin_unlock_irq(&blkif_io_lock); /* Flush gnttab callback work. Must be done with no locks held. */ flush_scheduled_work(); /* Free resources associated with old device channel. */ if (info->ring_ref != GRANT_INVALID_REF) { gnttab_end_foreign_access(info->ring_ref, 0, (unsigned long)info->ring.sring); info->ring_ref = GRANT_INVALID_REF; info->ring.sring = NULL; } if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0;}static void blkif_completion(struct blk_shadow *s){ int i; for (i = 0; i < s->req.nr_segments; i++) gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);}static irqreturn_t blkif_interrupt(int irq, void *dev_id){ struct request *req; struct blkif_response *bret; RING_IDX i, rp; unsigned long flags; struct blkfront_info *info = (struct blkfront_info *)dev_id; int uptodate; spin_lock_irqsave(&blkif_io_lock, flags); if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { spin_unlock_irqrestore(&blkif_io_lock, flags); return IRQ_HANDLED; } again: rp = info->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ for (i = info->ring.rsp_cons; i != rp; i++) { unsigned long id; int ret; bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; req = (struct request *)info->shadow[id].request; blkif_completion(&info->shadow[id]); add_id_to_freelist(info, id); uptodate = (bret->status == BLKIF_RSP_OKAY); switch (bret->operation) { case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); uptodate = -EOPNOTSUPP; info->feature_barrier = 0; xlvbd_barrier(info); } /* fall through */ case BLKIF_OP_READ: case BLKIF_OP_WRITE: if (unlikely(bret->status != BLKIF_RSP_OKAY))
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?