⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xen_blktap.c

📁 xen虚拟机源代码安装包
💻 C
字号:
/* xen_blktap.c * * Interface to blktapctrl to allow use of qemu block drivers with blktap. * This file is based on tools/blktap/drivers/tapdisk.c *  * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield. * Copyright (c) 2008 Kevin Wolf *//* * There are several communication channels which are used by this interface: * *   - A pair of pipes for receiving and sending general control messages *     (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain ID). *     These control messages are handled by handle_blktap_ctrlmsg(). * *   - One file descriptor per attached disk (/dev/xen/blktapN) for disk *     specific control messages. A callback is triggered on this fd if there *     is a new IO request. The callback function is handle_blktap_iomsg(). * *   - A shared ring for each attached disk containing the actual IO requests  *     and responses. Whenever handle_blktap_iomsg() is triggered it processes *     the requests on this ring. */#include <sys/stat.h>#include <sys/types.h>#include <sys/mman.h>#include <sys/ioctl.h>#include <fcntl.h>#include <stdio.h>#include <errno.h>#include <stdlib.h>#include "vl.h"#include "blktaplib.h"#include "xen_blktap.h"#include "block_int.h"#define MSG_SIZE 4096#define BLKTAP_CTRL_DIR "/var/run/tap"/* If enabled, print debug messages to stderr */#if 1#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a)#else#define DPRINTF(_f, _a...) ((void)0)#endif#if 1                                                                        #define ASSERT(_p) \    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s\n", #_p , \        __LINE__, __FILE__); *(int*)0=0; }#else#define ASSERT(_p) ((void)0)#endif extern int domid;int read_fd;int write_fd;static pid_t process;fd_list_entry_t *fd_start = NULL;static void handle_blktap_iomsg(void* private);struct aiocb_info {	struct td_state	*s;	uint64_t sector;	int nr_secs;	int idx;	long i;};static void unmap_disk(struct td_state *s){	tapdev_info_t *info = s->ring_info;	fd_list_entry_t *entry;		bdrv_close(s->bs);	if (info != NULL && info->mem > 0)	        munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);	entry = s->fd_entry;	*entry->pprev = entry->next;	if (entry->next)		entry->next->pprev = entry->pprev;	qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL);	close(info->fd);	free(s->fd_entry);	free(s->blkif);	free(s->ring_info);	free(s);	return;}static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s){	fd_list_entry_t **pprev, *entry;	DPRINTF("Adding fd_list_entry\n");	/*Add to linked list*/	s->fd_entry   = entry = malloc(sizeof(fd_list_entry_t));	entry->tap_fd = tap_fd;	entry->s      = s;	entry->next   = NULL;	pprev = &fd_start;	while (*pprev != NULL)		pprev = &(*pprev)->next;	*pprev = entry;	entry->pprev = pprev;	return entry;}static inline struct td_state *get_state(int cookie){	fd_list_entry_t *ptr;	ptr = fd_start;	while (ptr != NULL) {		if (ptr->cookie == cookie) return ptr->s;		ptr = ptr->next;	}	return NULL;}static struct td_state *state_init(void){	int i;	struct td_state *s;	blkif_t *blkif;	s = malloc(sizeof(struct td_state));	blkif = s->blkif = malloc(sizeof(blkif_t));	s->ring_info = calloc(1, sizeof(tapdev_info_t));	for (i = 0; i < MAX_REQUESTS; i++) {		blkif->pending_list[i].secs_pending = 0;		blkif->pending_list[i].submitting = 0;	}	return s;}static int map_new_dev(struct td_state *s, int minor){	int tap_fd;	tapdev_info_t *info = s->ring_info;	char *devname;	fd_list_entry_t *ptr;	int page_size;	if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor) == -1)		return -1;	tap_fd = open(devname, O_RDWR);	if (tap_fd == -1) 	{		DPRINTF("open failed on dev %s!\n",devname);		goto fail;	} 	info->fd = tap_fd;	/*Map the shared memory*/	page_size = getpagesize();	info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, 			  PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);	if ((long int)info->mem == -1) 	{		DPRINTF("mmap failed on dev %s!\n",devname);		goto fail;	}	/* assign the rings to the mapped memory */ 	info->sring = (blkif_sring_t *)((unsigned long)info->mem);	BACK_RING_INIT(&info->fe_ring, info->sring, page_size);		info->vstart = 	        (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size);	ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );	ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );	free(devname);	/*Update the fd entry*/	ptr = fd_start;	while (ptr != NULL) {		if (s == ptr->s) {			ptr->tap_fd = tap_fd;			/* Setup fd_handler for qemu main loop */			DPRINTF("set tap_fd = %d\n", tap_fd);			qemu_set_fd_handler2(tap_fd, NULL, &handle_blktap_iomsg, NULL, s);			break;		}		ptr = ptr->next;	}		DPRINTF("map_new_dev = %d\n", minor);	return minor; fail:	free(devname);	return -1;}static int open_disk(struct td_state *s, char *path, int readonly){	struct disk_id id;	BlockDriverState* bs;	DPRINTF("Opening %s\n", path);	bs = calloc(1, sizeof(*bs));	memset(&id, 0, sizeof(struct disk_id));	if (bdrv_open(bs, path, 0) != 0) {		fprintf(stderr, "Could not open image file %s\n", path);		return -ENOMEM;	}	s->bs = bs;	s->flags = readonly ? TD_RDONLY : 0;	s->size = bs->total_sectors;	s->sector_size = 512;	s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);	return 0;}static inline void write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp){	tapdev_info_t *info = s->ring_info;	blkif_response_t *rsp_d;		rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);	memcpy(rsp_d, rsp, sizeof(blkif_response_t));	info->fe_ring.rsp_prod_pvt++;}static inline void kick_responses(struct td_state *s){	tapdev_info_t *info = s->ring_info;	if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) 	{		RING_PUSH_RESPONSES(&info->fe_ring);		ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);	}}static int send_responses(struct td_state *s, int res, 		   uint64_t sector, int nr_secs, int idx, void *private){	pending_req_t   *preq;	blkif_request_t *req;	int responses_queued = 0;	blkif_t *blkif = s->blkif;	int secs_done = nr_secs;	if ( (idx > MAX_REQUESTS-1) )	{		DPRINTF("invalid index returned(%u)!\n", idx);		return 0;	}	preq = &blkif->pending_list[idx];	req  = &preq->req;	preq->secs_pending -= secs_done;	if (res == -EBUSY && preq->submitting) 		return -EBUSY;  /* propagate -EBUSY back to higher layers */	if (res) 		preq->status = BLKIF_RSP_ERROR;		if (!preq->submitting && preq->secs_pending == 0) 	{		blkif_request_t tmp;		blkif_response_t *rsp;		tmp = preq->req;		rsp = (blkif_response_t *)req;				rsp->id = tmp.id;		rsp->operation = tmp.operation;		rsp->status = preq->status;				write_rsp_to_ring(s, rsp);		responses_queued++;		kick_responses(s);	}		return responses_queued;}static void qemu_send_responses(void* opaque, int ret){	struct aiocb_info* info = opaque;	if (ret != 0) {		DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret));	}	send_responses(info->s, ret, info->sector, info->nr_secs, 		info->idx, (void*) info->i);	free(info);}/** * Callback function for the IO message pipe. Reads requests from the ring * and processes them (call qemu read/write functions). * * The private parameter points to the struct td_state representing the * disk the request is targeted at. */static void handle_blktap_iomsg(void* private){	struct td_state* s = private;	RING_IDX          rp, j, i;	blkif_request_t  *req;	int idx, nsects, ret;	uint64_t sector_nr;	uint8_t *page;	blkif_t *blkif = s->blkif;	tapdev_info_t *info = s->ring_info;	int page_size = getpagesize();	struct aiocb_info *aiocb_info;	if (info->fe_ring.sring == NULL) {		DPRINTF("  sring == NULL, ignoring IO request\n");		return;	}	rp = info->fe_ring.sring->req_prod; 	xen_rmb();	for (j = info->fe_ring.req_cons; j != rp; j++)	{		int start_seg = 0; 		req = NULL;		req = RING_GET_REQUEST(&info->fe_ring, j);		++info->fe_ring.req_cons;				if (req == NULL)			continue;		idx = req->id;		ASSERT(blkif->pending_list[idx].secs_pending == 0);		memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));		blkif->pending_list[idx].status = BLKIF_RSP_OKAY;		blkif->pending_list[idx].submitting = 1;		sector_nr = req->sector_number;		/* Don't allow writes on readonly devices */		if ((s->flags & TD_RDONLY) && 		    (req->operation == BLKIF_OP_WRITE)) {			blkif->pending_list[idx].status = BLKIF_RSP_ERROR;			goto send_response;		}		for (i = start_seg; i < req->nr_segments; i++) {			nsects = req->seg[i].last_sect - 				 req->seg[i].first_sect + 1;				if ((req->seg[i].last_sect >= page_size >> 9) ||					(nsects <= 0))				continue;			page  = (uint8_t*) MMAP_VADDR(info->vstart, 						   (unsigned long)req->id, i);			page += (req->seg[i].first_sect << SECTOR_SHIFT);			if (sector_nr >= s->size) {				DPRINTF("Sector request failed:\n");				DPRINTF("%s request, idx [%d,%d] size [%llu], "					"sector [%llu,%llu]\n",					(req->operation == BLKIF_OP_WRITE ? 					 "WRITE" : "READ"),					idx,i,					(long long unsigned) 						nsects<<SECTOR_SHIFT,					(long long unsigned) 						sector_nr<<SECTOR_SHIFT,					(long long unsigned) sector_nr);				continue;			}			blkif->pending_list[idx].secs_pending += nsects;			switch (req->operation) 			{			case BLKIF_OP_WRITE:				aiocb_info = malloc(sizeof(*aiocb_info));				aiocb_info->s = s;				aiocb_info->sector = sector_nr;				aiocb_info->nr_secs = nsects;				aiocb_info->idx = idx;				aiocb_info->i = i;				ret = (NULL == bdrv_aio_write(s->bs, sector_nr,							  page, nsects,							  qemu_send_responses,							  aiocb_info));				if (ret) {					blkif->pending_list[idx].status = BLKIF_RSP_ERROR;					DPRINTF("ERROR: bdrv_write() == NULL\n");					goto send_response;				}				break;			case BLKIF_OP_READ:				aiocb_info = malloc(sizeof(*aiocb_info));				aiocb_info->s = s;				aiocb_info->sector = sector_nr;				aiocb_info->nr_secs = nsects;				aiocb_info->idx = idx;				aiocb_info->i = i;				ret = (NULL == bdrv_aio_read(s->bs, sector_nr,							 page, nsects,							 qemu_send_responses,							 aiocb_info));				if (ret) {					blkif->pending_list[idx].status = BLKIF_RSP_ERROR;					DPRINTF("ERROR: bdrv_read() == NULL\n");					goto send_response;				}				break;			default:				DPRINTF("Unknown block operation\n");				break;			}			sector_nr += nsects;		}	send_response:		blkif->pending_list[idx].submitting = 0;		/* force write_rsp_to_ring for synchronous case */		if (blkif->pending_list[idx].secs_pending == 0)			send_responses(s, 0, 0, 0, idx, (void *)(long)0);	}}/** * Callback function for the qemu-read pipe. Reads and processes control  * message from the pipe. * * The parameter private is unused. */static void handle_blktap_ctrlmsg(void* private){	int length, len, msglen;	char *ptr, *path;	image_t *img;	msg_hdr_t *msg;	msg_newdev_t *msg_dev;	msg_pid_t *msg_pid;	int ret = -1;	struct td_state *s = NULL;	fd_list_entry_t *entry;	char buf[MSG_SIZE];	length = read(read_fd, buf, MSG_SIZE);	if (length > 0 && length >= sizeof(msg_hdr_t)) 	{		msg = (msg_hdr_t *)buf;		DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n",			length,msg->type,msg->cookie);		switch (msg->type) {		case CTLMSG_PARAMS: 						ptr = buf + sizeof(msg_hdr_t);			len = (length - sizeof(msg_hdr_t));			path = calloc(1, len + 1);						memcpy(path, ptr, len); 			DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);			/* Allocate the disk structs */			s = state_init();			/*Open file*/			if (s == NULL || open_disk(s, path, msg->readonly)) {				msglen = sizeof(msg_hdr_t);				msg->type = CTLMSG_IMG_FAIL;				msg->len = msglen;			} else {				entry = add_fd_entry(0, s);				entry->cookie = msg->cookie;				DPRINTF("Entered cookie %d\n", entry->cookie);								memset(buf, 0x00, MSG_SIZE); 							msglen = sizeof(msg_hdr_t) + sizeof(image_t);				msg->type = CTLMSG_IMG;				img = (image_t *)(buf + sizeof(msg_hdr_t));				img->size = s->size;				img->secsize = s->sector_size;				img->info = s->info;				DPRINTF("Writing (size, secsize, info) = "					"(%#" PRIx64 ", %#" PRIx64 ", %d)\n",					s->size, s->sector_size, s->info);			}			len = write(write_fd, buf, msglen);			free(path);			break;					case CTLMSG_NEWDEV:			msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));			s = get_state(msg->cookie);			DPRINTF("Retrieving state, cookie %d.....[%s]\n",				msg->cookie, (s == NULL ? "FAIL":"OK"));			if (s != NULL) {				ret = ((map_new_dev(s, msg_dev->devnum) 					== msg_dev->devnum ? 0: -1));			}				memset(buf, 0x00, MSG_SIZE); 			msglen = sizeof(msg_hdr_t);			msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP 				              : CTLMSG_NEWDEV_FAIL);			msg->len = msglen;			len = write(write_fd, buf, msglen);			break;		case CTLMSG_CLOSE:			s = get_state(msg->cookie);			if (s) unmap_disk(s);			break;					case CTLMSG_PID:			memset(buf, 0x00, MSG_SIZE);			msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);			msg->type = CTLMSG_PID_RSP;			msg->len = msglen;			msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));			process = getpid();			msg_pid->pid = process;			len = write(write_fd, buf, msglen);			break;		default:			break;		}	}}/** * Opens a control socket, i.e. a pipe to communicate with blktapctrl. * * Returns the file descriptor number for the pipe; -1 in error case */static int open_ctrl_socket(char *devname){	int ipc_fd;	if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)		DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);	if (access(devname, R_OK | W_OK))		return -1;	ipc_fd = open(devname,O_RDWR|O_NONBLOCK);	if (ipc_fd < 0) {		DPRINTF("FD open failed\n");		return -1;	}	return ipc_fd;}/** * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap * and register a fd handler. * * Returns 0 on success. */int init_blktap(void){	char* devname;		DPRINTF("Init blktap pipes\n");	/* Open the read pipe */	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {			read_fd = open_ctrl_socket(devname);				free(devname);				if (read_fd == -1) {			fprintf(stderr, "Could not open %s/qemu-read-%d\n",				BLKTAP_CTRL_DIR, domid);			return -1;		}	}		/* Open the write pipe */	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {		write_fd = open_ctrl_socket(devname);		free(devname);				if (write_fd == -1) {			fprintf(stderr, "Could not open %s/qemu-write-%d\n",				BLKTAP_CTRL_DIR, domid);			close(read_fd);			return -1;		}	}	/* Attach a handler to the read pipe (called from qemu main loop) */	qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -