📄 lguest.c
字号:
(void)convert(&iov[0], struct virtio_net_hdr); len = writev(vq->dev->fd, iov+1, out-1); add_used_and_trigger(fd, vq, head, len); }}/* This is where we handle a packet coming in from the tun device to our * Guest. */static bool handle_tun_input(int fd, struct device *dev){ unsigned int head, in_num, out_num; int len; struct iovec iov[dev->vq->vring.num]; struct virtio_net_hdr *hdr; /* First we need a network buffer from the Guests's recv virtqueue. */ head = get_vq_desc(dev->vq, iov, &out_num, &in_num); if (head == dev->vq->vring.num) { /* Now, it's expected that if we try to send a packet too * early, the Guest won't be ready yet. Wait until the device * status says it's ready. */ /* FIXME: Actually want DRIVER_ACTIVE here. */ if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) warn("network: no dma buffer!"); /* We'll turn this back on if input buffers are registered. */ return false; } else if (out_num) errx(1, "Output buffers in network recv queue?"); /* First element is the header: we set it to 0 (no features). */ hdr = convert(&iov[0], struct virtio_net_hdr); hdr->flags = 0; hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; /* Read the packet from the device directly into the Guest's buffer. */ len = readv(dev->fd, iov+1, in_num-1); if (len <= 0) err(1, "reading network"); /* Tell the Guest about the new packet. */ add_used_and_trigger(fd, dev->vq, head, sizeof(*hdr) + len); verbose("tun input packet len %i [%02x %02x] (%s)\n", len, ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], head != dev->vq->vring.num ? "sent" : "discarded"); /* All good. */ return true;}/*L:215 This is the callback attached to the network and console input * virtqueues: it ensures we try again, in case we stopped console or net * delivery because Guest didn't have any buffers. */static void enable_fd(int fd, struct virtqueue *vq){ add_device_fd(vq->dev->fd); /* Tell waker to listen to it again */ write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));}/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */static void handle_output(int fd, unsigned long addr){ struct device *i; struct virtqueue *vq; /* Check each virtqueue. */ for (i = devices.dev; i; i = i->next) { for (vq = i->vq; vq; vq = vq->next) { if (vq->config.pfn == addr/getpagesize() && vq->handle_output) { verbose("Output to %s\n", vq->dev->name); vq->handle_output(fd, vq); return; } } } /* Early console write is done using notify on a nul-terminated string * in Guest memory. */ if (addr >= guest_limit) errx(1, "Bad NOTIFY %#lx", addr); write(STDOUT_FILENO, from_guest_phys(addr), strnlen(from_guest_phys(addr), guest_limit - addr));}/* This is called when the Waker wakes us up: check for incoming file * descriptors. */static void handle_input(int fd){ /* select() wants a zeroed timeval to mean "don't wait". */ struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; for (;;) { struct device *i; fd_set fds = devices.infds; /* If nothing is ready, we're done. */ if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) break; /* Otherwise, call the device(s) which have readable * file descriptors and a method of handling them. */ for (i = devices.dev; i; i = i->next) { if (i->handle_input && FD_ISSET(i->fd, &fds)) { int dev_fd; if (i->handle_input(fd, i)) continue; /* If handle_input() returns false, it means we * should no longer service it. Networking and * console do this when there's no input * buffers to deliver into. Console also uses * it when it discovers that stdin is * closed. */ FD_CLR(i->fd, &devices.infds); /* Tell waker to ignore it too, by sending a * negative fd number (-1, since 0 is a valid * FD number). */ dev_fd = -i->fd - 1; write(waker_fd, &dev_fd, sizeof(dev_fd)); } } }}/*L:190 * Device Setup * * All devices need a descriptor so the Guest knows it exists, and a "struct * device" so the Launcher can keep track of it. We have common helper * routines to allocate them. * * This routine allocates a new "struct lguest_device_desc" from descriptor * table just above the Guest's normal memory. It returns a pointer to that * descriptor. */static struct lguest_device_desc *new_dev_desc(u16 type){ struct lguest_device_desc *d; /* We only have one page for all the descriptors. */ if (devices.desc_used + sizeof(*d) > getpagesize()) errx(1, "Too many devices"); /* We don't need to set config_len or status: page is 0 already. */ d = (void *)devices.descpage + devices.desc_used; d->type = type; devices.desc_used += sizeof(*d); return d;}/* Each device descriptor is followed by some configuration information. * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. * * This routine adds a new field to an existing device's descriptor. It only * works for the last device, but that's OK because that's how we use it. */static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c){ /* This is the last descriptor, right? */ assert(devices.descpage + devices.desc_used == (u8 *)(dev->desc + 1) + dev->desc->config_len); /* We only have one page of device descriptions. */ if (devices.desc_used + 2 + len > getpagesize()) errx(1, "Too many devices"); /* Copy in the new config header: type then length. */ devices.descpage[devices.desc_used++] = type; devices.descpage[devices.desc_used++] = len; memcpy(devices.descpage + devices.desc_used, c, len); devices.desc_used += len; /* Update the device descriptor length: two byte head then data. */ dev->desc->config_len += 2 + len;}/* This routine adds a virtqueue to a device. We specify how many descriptors * the virtqueue is to have. */static void add_virtqueue(struct device *dev, unsigned int num_descs, void (*handle_output)(int fd, struct virtqueue *me)){ unsigned int pages; struct virtqueue **i, *vq = malloc(sizeof(*vq)); void *p; /* First we need some pages for this virtqueue. */ pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) / getpagesize(); p = get_pages(pages); /* Initialize the virtqueue */ vq->next = NULL; vq->last_avail_idx = 0; vq->dev = dev; /* Initialize the configuration. */ vq->config.num = num_descs; vq->config.irq = devices.next_irq++; vq->config.pfn = to_guest_phys(p) / getpagesize(); /* Initialize the vring. */ vring_init(&vq->vring, num_descs, p, getpagesize()); /* Add the configuration information to this device's descriptor. */ add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, sizeof(vq->config), &vq->config); /* Add to tail of list, so dev->vq is first vq, dev->vq->next is * second. */ for (i = &dev->vq; *i; i = &(*i)->next); *i = vq; /* Set the routine to call when the Guest does something to this * virtqueue. */ vq->handle_output = handle_output; /* Set the "Don't Notify Me" flag if we don't have a handler */ if (!handle_output) vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;}/* This routine does all the creation and setup of a new device, including * calling new_dev_desc() to allocate the descriptor and device memory. */static struct device *new_device(const char *name, u16 type, int fd, bool (*handle_input)(int, struct device *)){ struct device *dev = malloc(sizeof(*dev)); /* Append to device list. Prepending to a single-linked list is * easier, but the user expects the devices to be arranged on the bus * in command-line order. The first network device on the command line * is eth0, the first block device /dev/vda, etc. */ *devices.lastdev = dev; dev->next = NULL; devices.lastdev = &dev->next; /* Now we populate the fields one at a time. */ dev->fd = fd; /* If we have an input handler for this file descriptor, then we add it * to the device_list's fdset and maxfd. */ if (handle_input) add_device_fd(dev->fd); dev->desc = new_dev_desc(type); dev->handle_input = handle_input; dev->name = name; dev->vq = NULL; return dev;}/* Our first setup routine is the console. It's a fairly simple device, but * UNIX tty handling makes it uglier than it could be. */static void setup_console(void){ struct device *dev; /* If we can save the initial standard input settings... */ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { struct termios term = orig_term; /* Then we turn off echo, line buffering and ^C etc. We want a * raw input stream to the Guest. */ term.c_lflag &= ~(ISIG|ICANON|ECHO); tcsetattr(STDIN_FILENO, TCSANOW, &term); /* If we exit gracefully, the original settings will be * restored so the user can see what they're typing. */ atexit(restore_term); } dev = new_device("console", VIRTIO_ID_CONSOLE, STDIN_FILENO, handle_console_input); /* We store the console state in dev->priv, and initialize it. */ dev->priv = malloc(sizeof(struct console_abort)); ((struct console_abort *)dev->priv)->count = 0; /* The console needs two virtqueues: the input then the output. When * they put something the input queue, we make sure we're listening to * stdin. When they put something in the output queue, we write it to * stdout. */ add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); verbose("device %u: console\n", devices.device_num++);}/*:*//*M:010 Inter-guest networking is an interesting area. Simplest is to have a * --sharenet=<name> option which opens or creates a named pipe. This can be * used to send packets to another guest in a 1:1 manner. * * More sopisticated is to use one of the tools developed for project like UML * to do networking. * * Faster is to do virtio bonding in kernel. Doing this 1:1 would be * completely generic ("here's my vring, attach to your vring") and would work * for any traffic. Of course, namespace and permissions issues need to be * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide * multiple inter-guest channels behind one interface, although it would * require some manner of hotplugging new virtio channels. * * Finally, we could implement a virtio network switch in the kernel. :*/static u32 str2ip(const char *ipaddr){ unsigned int byte[4]; sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]); return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3];}/* This code is "adapted" from libbridge: it attaches the Host end of the * network device to the bridge device specified by the command line. * * This is yet another James Morris contribution (I'm an IP-level guy, so I * dislike bridging), and I just try not to break it. */static void add_to_bridge(int fd, const char *if_name, const char *br_name){ int ifidx; struct ifreq ifr; if (!*br_name) errx(1, "must specify bridge name"); ifidx = if_nametoindex(if_name); if (!ifidx) errx(1, "interface %s does not exist!", if_name); strncpy(ifr.ifr_name, br_name, IFNAMSIZ); ifr.ifr_ifindex = ifidx; if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) err(1, "can't add %s to bridge %s", if_name, br_name);}/* This sets up the Host end of the network device with an IP address, brings * it up so packets will flow, the copies the MAC address into the hwaddr * pointer. */static void configure_device(int fd, const char *devname, u32 ipaddr, unsigned char hwaddr[6]){ struct ifreq ifr; struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; /* Don't read these incantations. Just cut & paste them like I did! */ memset(&ifr, 0, sizeof(ifr)); strcpy(ifr.ifr_name, devname); sin->sin_family = AF_INET; sin->sin_addr.s_addr = htonl(ipaddr); if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) err(1, "Setting %s interface address", devname); ifr.ifr_flags = IFF_UP; if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) err(1, "Bringing interface %s up", devname); /* SIOC stands for Socket I/O Control. G means Get (vs S for Set * above). IF means Interface, and HWADDR is hardware address. * Simple! */ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) err(1, "getting hw address for %s", devname); memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6);}/*L:195 Our network is a Host<->Guest network. This can either use bridging or * routing, but the principle is the same: it uses the "tun" device to inject * packets into the Host as if they came in from a normal network card. We * just shunt packets between the Guest and the tun device. */static void setup_tun_net(const char *arg){ struct device *dev; struct ifreq ifr; int netfd, ipfd; u32 ip; const char *br_name = NULL; u8 hwaddr[6]; /* We open the /dev/net/tun device and tell it we want a tap device. A * tap device is like a tun device, only somehow different. To tell * the truth, I completely blundered my way through this code, but it * works now! */ netfd = open_or_die("/dev/net/tun", O_RDWR); memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; strcpy(ifr.ifr_name, "tap%d"); if (ioctl(netfd, TUNSETIFF, &ifr) != 0) err(1, "configuring /dev/net/tun"); /* We don't need checksums calculated for packets coming in this * device: trust us! */ ioctl(netfd, TUNSETNOCSUM, 1); /* First we create a new network device. */ dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); /* Network devices need a receive and a send queue, just like * console. */ add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); /* We need a socket to perform the magic network ioctls to bring up the * tap interface, connect to the bridge etc. Any socket will do! */ ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); if (ipfd < 0) err(1, "opening IP socket"); /* If the command line was --tunnet=bridge:<name> do bridging. */ if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { ip = INADDR_ANY; br_name = arg + strlen(BRIDGE_PFX); add_to_bridge(ipfd, ifr.ifr_name, br_name); } else /* It is an IP address to set up the device with */ ip = str2ip(arg); /* Set up the tun device, and get the mac address for the interface. */ configure_device(ipfd, ifr.ifr_name, ip, hwaddr); /* Tell Guest what MAC address to use. */ add_desc_field(dev, VIRTIO_CONFIG_NET_MAC_F, sizeof(hwaddr), hwaddr); /* We don't seed the socket any more; setup is done. */ close(ipfd); verbose("device %u: tun net %u.%u.%u.%u\n", devices.device_num++, (u8)(ip>>24),(u8)(ip>>16),(u8)(ip>>8),(u8)ip); if (br_name) verbose("attached to bridge: %s\n", br_name);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -