⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lguest.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
/* Our block (disk) device should be really simple: the Guest asks for a block * number and we read or write that position in the file.  Unfortunately, that * was amazingly slow: the Guest waits until the read is finished before * running anything else, even if it could have been doing useful work. * * We could use async I/O, except it's reputed to suck so hard that characters * actually go missing from your code when you try to use it. * * So we farm the I/O out to thread, and communicate with it via a pipe. *//* This hangs off device->priv. */struct vblk_info{	/* The size of the file. */	off64_t len;	/* The file descriptor for the file. */	int fd;	/* IO thread listens on this file descriptor [0]. */	int workpipe[2];	/* IO thread writes to this file descriptor to mark it done, then	 * Launcher triggers interrupt to Guest. */	int done_fd;};/*:*//*L:210 * The Disk * * Remember that the block device is handled by a separate I/O thread.  We head * straight into the core of that thread here: */static bool service_io(struct device *dev){	struct vblk_info *vblk = dev->priv;	unsigned int head, out_num, in_num, wlen;	int ret;	struct virtio_blk_inhdr *in;	struct virtio_blk_outhdr *out;	struct iovec iov[dev->vq->vring.num];	off64_t off;	/* See if there's a request waiting.  If not, nothing to do. */	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);	if (head == dev->vq->vring.num)		return false;	/* Every block request should contain at least one output buffer	 * (detailing the location on disk and the type of request) and one	 * input buffer (to hold the result). */	if (out_num == 0 || in_num == 0)		errx(1, "Bad virtblk cmd %u out=%u in=%u",		     head, out_num, in_num);	out = convert(&iov[0], struct virtio_blk_outhdr);	in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr);	off = out->sector * 512;	/* The block device implements "barriers", where the Guest indicates	 * that it wants all previous writes to occur before this write.  We	 * don't have a way of asking our kernel to do a barrier, so we just	 * synchronize all the data in the file.  Pretty poor, no? */	if (out->type & VIRTIO_BLK_T_BARRIER)		fdatasync(vblk->fd);	/* In general the virtio block driver is allowed to try SCSI commands.	 * It'd be nice if we supported eject, for example, but we don't. */	if (out->type & VIRTIO_BLK_T_SCSI_CMD) {		fprintf(stderr, "Scsi commands unsupported\n");		in->status = VIRTIO_BLK_S_UNSUPP;		wlen = sizeof(*in);	} else if (out->type & VIRTIO_BLK_T_OUT) {		/* Write */		/* Move to the right location in the block file.  This can fail		 * if they try to write past end. */		if (lseek64(vblk->fd, off, SEEK_SET) != off)			err(1, "Bad seek to sector %llu", out->sector);		ret = writev(vblk->fd, iov+1, out_num-1);		verbose("WRITE to sector %llu: %i\n", out->sector, ret);		/* Grr... Now we know how long the descriptor they sent was, we		 * make sure they didn't try to write over the end of the block		 * file (possibly extending it). */		if (ret > 0 && off + ret > vblk->len) {			/* Trim it back to the correct length */			ftruncate64(vblk->fd, vblk->len);			/* Die, bad Guest, die. */			errx(1, "Write past end %llu+%u", off, ret);		}		wlen = sizeof(*in);		in->status = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);	} else {		/* Read */		/* Move to the right location in the block file.  This can fail		 * if they try to read past end. */		if (lseek64(vblk->fd, off, SEEK_SET) != off)			err(1, "Bad seek to sector %llu", out->sector);		ret = readv(vblk->fd, iov+1, in_num-1);		verbose("READ from sector %llu: %i\n", out->sector, ret);		if (ret >= 0) {			wlen = sizeof(*in) + ret;			in->status = VIRTIO_BLK_S_OK;		} else {			wlen = sizeof(*in);			in->status = VIRTIO_BLK_S_IOERR;		}	}	/* We can't trigger an IRQ, because we're not the Launcher.  It does	 * that when we tell it we're done. */	add_used(dev->vq, head, wlen);	return true;}/* This is the thread which actually services the I/O. */static int io_thread(void *_dev){	struct device *dev = _dev;	struct vblk_info *vblk = dev->priv;	char c;	/* Close other side of workpipe so we get 0 read when main dies. */	close(vblk->workpipe[1]);	/* Close the other side of the done_fd pipe. */	close(dev->fd);	/* When this read fails, it means Launcher died, so we follow. */	while (read(vblk->workpipe[0], &c, 1) == 1) {		/* We acknowledge each request immediately to reduce latency,		 * rather than waiting until we've done them all.  I haven't		 * measured to see if it makes any difference. */		while (service_io(dev))			write(vblk->done_fd, &c, 1);	}	return 0;}/* Now we've seen the I/O thread, we return to the Launcher to see what happens * when the thread tells us it's completed some I/O. */static bool handle_io_finish(int fd, struct device *dev){	char c;	/* If the I/O thread died, presumably it printed the error, so we	 * simply exit. */	if (read(dev->fd, &c, 1) != 1)		exit(1);	/* It did some work, so trigger the irq. */	trigger_irq(fd, dev->vq);	return true;}/* When the Guest submits some I/O, we just need to wake the I/O thread. */static void handle_virtblk_output(int fd, struct virtqueue *vq){	struct vblk_info *vblk = vq->dev->priv;	char c = 0;	/* Wake up I/O thread and tell it to go to work! */	if (write(vblk->workpipe[1], &c, 1) != 1)		/* Presumably it indicated why it died. */		exit(1);}/*L:198 This actually sets up a virtual block device. */static void setup_block_file(const char *filename){	int p[2];	struct device *dev;	struct vblk_info *vblk;	void *stack;	u64 cap;	unsigned int val;	/* This is the pipe the I/O thread will use to tell us I/O is done. */	pipe(p);	/* The device responds to return from I/O thread. */	dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish);	/* The device has one virtqueue, where the Guest places requests. */	add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);	/* Allocate the room for our own bookkeeping */	vblk = dev->priv = malloc(sizeof(*vblk));	/* First we open the file and store the length. */	vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);	vblk->len = lseek64(vblk->fd, 0, SEEK_END);	/* Tell Guest how many sectors this device has. */	cap = cpu_to_le64(vblk->len / 512);	add_desc_field(dev, VIRTIO_CONFIG_BLK_F_CAPACITY, sizeof(cap), &cap);	/* Tell Guest not to put in too many descriptors at once: two are used	 * for the in and out elements. */	val = cpu_to_le32(VIRTQUEUE_NUM - 2);	add_desc_field(dev, VIRTIO_CONFIG_BLK_F_SEG_MAX, sizeof(val), &val);	/* The I/O thread writes to this end of the pipe when done. */	vblk->done_fd = p[1];	/* This is the second pipe, which is how we tell the I/O thread about	 * more work. */	pipe(vblk->workpipe);	/* Create stack for thread and run it */	stack = malloc(32768);	if (clone(io_thread, stack + 32768, CLONE_VM, dev) == -1)		err(1, "Creating clone");	/* We don't need to keep the I/O thread's end of the pipes open. */	close(vblk->done_fd);	close(vblk->workpipe[0]);	verbose("device %u: virtblock %llu sectors\n",		devices.device_num, cap);}/* That's the end of device setup. *//*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves * its input and output, and finally, lays it to rest. */static void __attribute__((noreturn)) run_guest(int lguest_fd){	for (;;) {		unsigned long args[] = { LHREQ_BREAK, 0 };		unsigned long notify_addr;		int readval;		/* We read from the /dev/lguest device to run the Guest. */		readval = read(lguest_fd, &notify_addr, sizeof(notify_addr));		/* One unsigned long means the Guest did HCALL_NOTIFY */		if (readval == sizeof(notify_addr)) {			verbose("Notify on address %#lx\n", notify_addr);			handle_output(lguest_fd, notify_addr);			continue;		/* ENOENT means the Guest died.  Reading tells us why. */		} else if (errno == ENOENT) {			char reason[1024] = { 0 };			read(lguest_fd, reason, sizeof(reason)-1);			errx(1, "%s", reason);		/* EAGAIN means the Waker wanted us to look at some input.		 * Anything else means a bug or incompatible change. */		} else if (errno != EAGAIN)			err(1, "Running guest failed");		/* Service input, then unset the BREAK to release the Waker. */		handle_input(lguest_fd);		if (write(lguest_fd, args, sizeof(args)) < 0)			err(1, "Resetting break");	}}/* * This is the end of the Launcher.  The good news: we are over halfway * through!  The bad news: the most fiendish part of the code still lies ahead * of us. * * Are you ready?  Take a deep breath and join me in the core of the Host, in * "make Host". :*/static struct option opts[] = {	{ "verbose", 0, NULL, 'v' },	{ "tunnet", 1, NULL, 't' },	{ "block", 1, NULL, 'b' },	{ "initrd", 1, NULL, 'i' },	{ NULL },};static void usage(void){	errx(1, "Usage: lguest [--verbose] "	     "[--tunnet=(<ipaddr>|bridge:<bridgename>)\n"	     "|--block=<filename>|--initrd=<filename>]...\n"	     "<mem-in-mb> vmlinux [args...]");}/*L:105 The main routine is where the real work begins: */int main(int argc, char *argv[]){	/* Memory, top-level pagetable, code startpoint and size of the	 * (optional) initrd. */	unsigned long mem = 0, pgdir, start, initrd_size = 0;	/* Two temporaries and the /dev/lguest file descriptor. */	int i, c, lguest_fd;	/* The boot information for the Guest. */	struct boot_params *boot;	/* If they specify an initrd file to load. */	const char *initrd_name = NULL;	/* First we initialize the device list.  Since console and network	 * device receive input from a file descriptor, we keep an fdset	 * (infds) and the maximum fd number (max_infd) with the head of the	 * list.  We also keep a pointer to the last device, for easy appending	 * to the list.  Finally, we keep the next interrupt number to hand out	 * (1: remember that 0 is used by the timer). */	FD_ZERO(&devices.infds);	devices.max_infd = -1;	devices.lastdev = &devices.dev;	devices.next_irq = 1;	/* We need to know how much memory so we can set up the device	 * descriptor and memory pages for the devices as we parse the command	 * line.  So we quickly look through the arguments to find the amount	 * of memory now. */	for (i = 1; i < argc; i++) {		if (argv[i][0] != '-') {			mem = atoi(argv[i]) * 1024 * 1024;			/* We start by mapping anonymous pages over all of			 * guest-physical memory range.  This fills it with 0,			 * and ensures that the Guest won't be killed when it			 * tries to access it. */			guest_base = map_zeroed_pages(mem / getpagesize()						      + DEVICE_PAGES);			guest_limit = mem;			guest_max = mem + DEVICE_PAGES*getpagesize();			devices.descpage = get_pages(1);			break;		}	}	/* The options are fairly straight-forward */	while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {		switch (c) {		case 'v':			verbose = true;			break;		case 't':			setup_tun_net(optarg);			break;		case 'b':			setup_block_file(optarg);			break;		case 'i':			initrd_name = optarg;			break;		default:			warnx("Unknown argument %s", argv[optind]);			usage();		}	}	/* After the other arguments we expect memory and kernel image name,	 * followed by command line arguments for the kernel. */	if (optind + 2 > argc)		usage();	verbose("Guest base is at %p\n", guest_base);	/* We always have a console device */	setup_console();	/* Now we load the kernel */	start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));	/* Boot information is stashed at physical address 0 */	boot = from_guest_phys(0);	/* Map the initrd image if requested (at top of physical memory) */	if (initrd_name) {		initrd_size = load_initrd(initrd_name, mem);		/* These are the location in the Linux boot header where the		 * start and size of the initrd are expected to be found. */		boot->hdr.ramdisk_image = mem - initrd_size;		boot->hdr.ramdisk_size = initrd_size;		/* The bootloader type 0xFF means "unknown"; that's OK. */		boot->hdr.type_of_loader = 0xFF;	}	/* Set up the initial linear pagetables, starting below the initrd. */	pgdir = setup_pagetables(mem, initrd_size);	/* The Linux boot header contains an "E820" memory map: ours is a	 * simple, single region. */	boot->e820_entries = 1;	boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM });	/* The boot header contains a command line pointer: we put the command	 * line after the boot header. */	boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);	/* We use a simple helper to copy the arguments separated by spaces. */	concat((char *)(boot + 1), argv+optind+2);	/* Boot protocol version: 2.07 supports the fields for lguest. */	boot->hdr.version = 0x207;	/* The hardware_subarch value of "1" tells the Guest it's an lguest. */	boot->hdr.hardware_subarch = 1;	/* Tell the entry path not to try to reload segment registers. */	boot->hdr.loadflags |= KEEP_SEGMENTS;	/* We tell the kernel to initialize the Guest: this returns the open	 * /dev/lguest file descriptor. */	lguest_fd = tell_kernel(pgdir, start);	/* We fork off a child process, which wakes the Launcher whenever one	 * of the input file descriptors needs attention.  Otherwise we would	 * run the Guest until it tries to output something. */	waker_fd = setup_waker(lguest_fd);	/* Finally, run the Guest.  This doesn't return. */	run_guest(lguest_fd);}/*:*//*M:999 * Mastery is done: you now know everything I do. * * But surely you have seen code, features and bugs in your wanderings which * you now yearn to attack?  That is the real game, and I look forward to you * patching and forking lguest into the Your-Name-Here-visor. * * Farewell, and good coding! * Rusty Russell. */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -