⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lguest.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
 * * We lay them out of the way, just below the initrd (which is why we need to * know its size). */static unsigned long setup_pagetables(unsigned long mem,				      unsigned long initrd_size){	unsigned long *pgdir, *linear;	unsigned int mapped_pages, i, linear_pages;	unsigned int ptes_per_page = getpagesize()/sizeof(void *);	mapped_pages = mem/getpagesize();	/* Each PTE page can map ptes_per_page pages: how many do we need? */	linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;	/* We put the toplevel page directory page at the top of memory. */	pgdir = from_guest_phys(mem) - initrd_size - getpagesize();	/* Now we use the next linear_pages pages as pte pages */	linear = (void *)pgdir - linear_pages*getpagesize();	/* Linear mapping is easy: put every page's address into the mapping in	 * order.  PAGE_PRESENT contains the flags Present, Writable and	 * Executable. */	for (i = 0; i < mapped_pages; i++)		linear[i] = ((i * getpagesize()) | PAGE_PRESENT);	/* The top level points to the linear page table pages above. */	for (i = 0; i < mapped_pages; i += ptes_per_page) {		pgdir[i/ptes_per_page]			= ((to_guest_phys(linear) + i*sizeof(void *))			   | PAGE_PRESENT);	}	verbose("Linear mapping of %u pages in %u pte pages at %#lx\n",		mapped_pages, linear_pages, to_guest_phys(linear));	/* We return the top level (guest-physical) address: the kernel needs	 * to know where it is. */	return to_guest_phys(pgdir);}/*:*//* Simple routine to roll all the commandline arguments together with spaces * between them. */static void concat(char *dst, char *args[]){	unsigned int i, len = 0;	for (i = 0; args[i]; i++) {		strcpy(dst+len, args[i]);		strcat(dst+len, " ");		len += strlen(args[i]) + 1;	}	/* In case it's empty. */	dst[len] = '\0';}/*L:185 This is where we actually tell the kernel to initialize the Guest.  We * saw the arguments it expects when we looked at initialize() in lguest_user.c: * the base of Guest "physical" memory, the top physical page to allow, the * top level pagetable and the entry point for the Guest. */static int tell_kernel(unsigned long pgdir, unsigned long start){	unsigned long args[] = { LHREQ_INITIALIZE,				 (unsigned long)guest_base,				 guest_limit / getpagesize(), pgdir, start };	int fd;	verbose("Guest: %p - %p (%#lx)\n",		guest_base, guest_base + guest_limit, guest_limit);	fd = open_or_die("/dev/lguest", O_RDWR);	if (write(fd, args, sizeof(args)) < 0)		err(1, "Writing to /dev/lguest");	/* We return the /dev/lguest file descriptor to control this Guest */	return fd;}/*:*/static void add_device_fd(int fd){	FD_SET(fd, &devices.infds);	if (fd > devices.max_infd)		devices.max_infd = fd;}/*L:200 * The Waker. * * With console, block and network devices, we can have lots of input which we * need to process.  We could try to tell the kernel what file descriptors to * watch, but handing a file descriptor mask through to the kernel is fairly * icky. * * Instead, we fork off a process which watches the file descriptors and writes * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host * stop running the Guest.  This causes the Launcher to return from the * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset * the LHREQ_BREAK and wake us up again. * * This, of course, is merely a different *kind* of icky. */static void wake_parent(int pipefd, int lguest_fd){	/* Add the pipe from the Launcher to the fdset in the device_list, so	 * we watch it, too. */	add_device_fd(pipefd);	for (;;) {		fd_set rfds = devices.infds;		unsigned long args[] = { LHREQ_BREAK, 1 };		/* Wait until input is ready from one of the devices. */		select(devices.max_infd+1, &rfds, NULL, NULL, NULL);		/* Is it a message from the Launcher? */		if (FD_ISSET(pipefd, &rfds)) {			int fd;			/* If read() returns 0, it means the Launcher has			 * exited.  We silently follow. */			if (read(pipefd, &fd, sizeof(fd)) == 0)				exit(0);			/* Otherwise it's telling us to change what file			 * descriptors we're to listen to.  Positive means			 * listen to a new one, negative means stop			 * listening. */			if (fd >= 0)				FD_SET(fd, &devices.infds);			else				FD_CLR(-fd - 1, &devices.infds);		} else /* Send LHREQ_BREAK command. */			write(lguest_fd, args, sizeof(args));	}}/* This routine just sets up a pipe to the Waker process. */static int setup_waker(int lguest_fd){	int pipefd[2], child;	/* We create a pipe to talk to the Waker, and also so it knows when the	 * Launcher dies (and closes pipe). */	pipe(pipefd);	child = fork();	if (child == -1)		err(1, "forking");	if (child == 0) {		/* We are the Waker: close the "writing" end of our copy of the		 * pipe and start waiting for input. */		close(pipefd[1]);		wake_parent(pipefd[0], lguest_fd);	}	/* Close the reading end of our copy of the pipe. */	close(pipefd[0]);	/* Here is the fd used to talk to the waker. */	return pipefd[1];}/* * Device Handling. * * When the Guest gives us a buffer, it sends an array of addresses and sizes. * We need to make sure it's not trying to reach into the Launcher itself, so * we have a convenient routine which checks it and exits with an error message * if something funny is going on: */static void *_check_pointer(unsigned long addr, unsigned int size,			    unsigned int line){	/* We have to separately check addr and addr+size, because size could	 * be huge and addr + size might wrap around. */	if (addr >= guest_limit || addr + size >= guest_limit)		errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);	/* We return a pointer for the caller's convenience, now we know it's	 * safe to use. */	return from_guest_phys(addr);}/* A macro which transparently hands the line number to the real function. */#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)/* Each buffer in the virtqueues is actually a chain of descriptors.  This * function returns the next descriptor in the chain, or vq->vring.num if we're * at the end. */static unsigned next_desc(struct virtqueue *vq, unsigned int i){	unsigned int next;	/* If this descriptor says it doesn't chain, we're done. */	if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))		return vq->vring.num;	/* Check they're not leading us off end of descriptors. */	next = vq->vring.desc[i].next;	/* Make sure compiler knows to grab that: we don't want it changing! */	wmb();	if (next >= vq->vring.num)		errx(1, "Desc next is %u", next);	return next;}/* This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access.  Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * * This function returns the descriptor number found, or vq->vring.num (which * is never a valid descriptor number) if none was found. */static unsigned get_vq_desc(struct virtqueue *vq,			    struct iovec iov[],			    unsigned int *out_num, unsigned int *in_num){	unsigned int i, head;	/* Check it isn't doing very strange things with descriptor numbers. */	if ((u16)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)		errx(1, "Guest moved used index from %u to %u",		     vq->last_avail_idx, vq->vring.avail->idx);	/* If there's nothing new since last we looked, return invalid. */	if (vq->vring.avail->idx == vq->last_avail_idx)		return vq->vring.num;	/* Grab the next descriptor number they're advertising, and increment	 * the index we've seen. */	head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];	/* If their number is silly, that's a fatal mistake. */	if (head >= vq->vring.num)		errx(1, "Guest says index %u is available", head);	/* When we start there are none of either input nor output. */	*out_num = *in_num = 0;	i = head;	do {		/* Grab the first descriptor, and check it's OK. */		iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;		iov[*out_num + *in_num].iov_base			= check_pointer(vq->vring.desc[i].addr,					vq->vring.desc[i].len);		/* If this is an input descriptor, increment that count. */		if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)			(*in_num)++;		else {			/* If it's an output descriptor, they're all supposed			 * to come before any input descriptors. */			if (*in_num)				errx(1, "Descriptor has out after in");			(*out_num)++;		}		/* If we've got too many, that implies a descriptor loop. */		if (*out_num + *in_num > vq->vring.num)			errx(1, "Looped descriptor");	} while ((i = next_desc(vq, i)) != vq->vring.num);	return head;}/* After we've used one of their buffers, we tell them about it.  We'll then * want to send them an interrupt, using trigger_irq(). */static void add_used(struct virtqueue *vq, unsigned int head, int len){	struct vring_used_elem *used;	/* The virtqueue contains a ring of used buffers.  Get a pointer to the	 * next entry in that used ring. */	used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];	used->id = head;	used->len = len;	/* Make sure buffer is written before we update index. */	wmb();	vq->vring.used->idx++;}/* This actually sends the interrupt for this virtqueue */static void trigger_irq(int fd, struct virtqueue *vq){	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };	/* If they don't want an interrupt, don't send one. */	if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)		return;	/* Send the Guest an interrupt tell them we used something up. */	if (write(fd, buf, sizeof(buf)) != 0)		err(1, "Triggering irq %i", vq->config.irq);}/* And here's the combo meal deal.  Supersize me! */static void add_used_and_trigger(int fd, struct virtqueue *vq,				 unsigned int head, int len){	add_used(vq, head, len);	trigger_irq(fd, vq);}/* * The Console * * Here is the input terminal setting we save, and the routine to restore them * on exit so the user gets their terminal back. */static struct termios orig_term;static void restore_term(void){	tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);}/* We associate some data with the console for our exit hack. */struct console_abort{	/* How many times have they hit ^C? */	int count;	/* When did they start? */	struct timeval start;};/* This is the routine which handles console input (ie. stdin). */static bool handle_console_input(int fd, struct device *dev){	int len;	unsigned int head, in_num, out_num;	struct iovec iov[dev->vq->vring.num];	struct console_abort *abort = dev->priv;	/* First we need a console buffer from the Guests's input virtqueue. */	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);	/* If they're not ready for input, stop listening to this file	 * descriptor.  We'll start again once they add an input buffer. */	if (head == dev->vq->vring.num)		return false;	if (out_num)		errx(1, "Output buffers in console in queue?");	/* This is why we convert to iovecs: the readv() call uses them, and so	 * it reads straight into the Guest's buffer. */	len = readv(dev->fd, iov, in_num);	if (len <= 0) {		/* This implies that the console is closed, is /dev/null, or		 * something went terribly wrong. */		warnx("Failed to get console input, ignoring console.");		/* Put the input terminal back. */		restore_term();		/* Remove callback from input vq, so it doesn't restart us. */		dev->vq->handle_output = NULL;		/* Stop listening to this fd: don't call us again. */		return false;	}	/* Tell the Guest about the new input. */	add_used_and_trigger(fd, dev->vq, head, len);	/* Three ^C within one second?  Exit.	 *	 * This is such a hack, but works surprisingly well.  Each ^C has to be	 * in a buffer by itself, so they can't be too fast.  But we check that	 * we get three within about a second, so they can't be too slow. */	if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {		if (!abort->count++)			gettimeofday(&abort->start, NULL);		else if (abort->count == 3) {			struct timeval now;			gettimeofday(&now, NULL);			if (now.tv_sec <= abort->start.tv_sec+1) {				unsigned long args[] = { LHREQ_BREAK, 0 };				/* Close the fd so Waker will know it has to				 * exit. */				close(waker_fd);				/* Just in case waker is blocked in BREAK, send				 * unbreak now. */				write(fd, args, sizeof(args));				exit(2);			}			abort->count = 0;		}	} else		/* Any other key resets the abort counter. */		abort->count = 0;	/* Everything went OK! */	return true;}/* Handling output for console is simple: we just get all the output buffers * and write them to stdout. */static void handle_console_output(int fd, struct virtqueue *vq){	unsigned int head, out, in;	int len;	struct iovec iov[vq->vring.num];	/* Keep getting output buffers from the Guest until we run out. */	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {		if (in)			errx(1, "Input buffers in output queue?");		len = writev(STDOUT_FILENO, iov, out);		add_used_and_trigger(fd, vq, head, len);	}}/* * The Network * * Handling output for network is also simple: we get all the output buffers * and write them (ignoring the first element) to this device's file descriptor * (stdout). */static void handle_net_output(int fd, struct virtqueue *vq){	unsigned int head, out, in;	int len;	struct iovec iov[vq->vring.num];	/* Keep getting output buffers from the Guest until we run out. */	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {		if (in)			errx(1, "Input buffers in output queue?");		/* Check header, but otherwise ignore it (we told the Guest we		 * supported no features, so it shouldn't have anything		 * interesting). */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -