kvm_main.c

来自「linux 内核源代码」· C语言 代码 · 共 2,825 行 · 第 1/5 页

C
2,825
字号
	kvm_x86_ops->cache_regs(vcpu);	function = vcpu->regs[VCPU_REGS_RAX];	vcpu->regs[VCPU_REGS_RAX] = 0;	vcpu->regs[VCPU_REGS_RBX] = 0;	vcpu->regs[VCPU_REGS_RCX] = 0;	vcpu->regs[VCPU_REGS_RDX] = 0;	best = NULL;	for (i = 0; i < vcpu->cpuid_nent; ++i) {		e = &vcpu->cpuid_entries[i];		if (e->function == function) {			best = e;			break;		}		/*		 * Both basic or both extended?		 */		if (((e->function ^ function) & 0x80000000) == 0)			if (!best || e->function > best->function)				best = e;	}	if (best) {		vcpu->regs[VCPU_REGS_RAX] = best->eax;		vcpu->regs[VCPU_REGS_RBX] = best->ebx;		vcpu->regs[VCPU_REGS_RCX] = best->ecx;		vcpu->regs[VCPU_REGS_RDX] = best->edx;	}	kvm_x86_ops->decache_regs(vcpu);	kvm_x86_ops->skip_emulated_instruction(vcpu);}EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);static int pio_copy_data(struct kvm_vcpu *vcpu){	void *p = vcpu->pio_data;	void *q;	unsigned bytes;	int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;	q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,		 PAGE_KERNEL);	if (!q) {		free_pio_guest_pages(vcpu);		return -ENOMEM;	}	q += vcpu->pio.guest_page_offset;	bytes = vcpu->pio.size * vcpu->pio.cur_count;	if (vcpu->pio.in)		memcpy(q, p, bytes);	else		memcpy(p, q, bytes);	q -= vcpu->pio.guest_page_offset;	vunmap(q);	free_pio_guest_pages(vcpu);	return 0;}static int complete_pio(struct kvm_vcpu *vcpu){	struct kvm_pio_request *io = &vcpu->pio;	long delta;	int r;	kvm_x86_ops->cache_regs(vcpu);	if (!io->string) {		if (io->in)			memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,			       io->size);	} else {		if (io->in) {			r = pio_copy_data(vcpu);			if (r) {				kvm_x86_ops->cache_regs(vcpu);				return r;			}		}		delta = 1;		if (io->rep) {			delta *= io->cur_count;			/*			 * The size of the register should really depend on			 * current address size.			 */			vcpu->regs[VCPU_REGS_RCX] -= delta;		}		if (io->down)			delta = -delta;		delta *= io->size;		if (io->in)			vcpu->regs[VCPU_REGS_RDI] += delta;		else			vcpu->regs[VCPU_REGS_RSI] += delta;	}	kvm_x86_ops->decache_regs(vcpu);	io->count -= io->cur_count;	io->cur_count = 0;	return 0;}static void kernel_pio(struct kvm_io_device *pio_dev,		       struct kvm_vcpu *vcpu,		       void *pd){	/* TODO: String I/O for in kernel device */	mutex_lock(&vcpu->kvm->lock);	if (vcpu->pio.in)		kvm_iodevice_read(pio_dev, vcpu->pio.port,				  vcpu->pio.size,				  pd);	else		kvm_iodevice_write(pio_dev, vcpu->pio.port,				   vcpu->pio.size,				   pd);	mutex_unlock(&vcpu->kvm->lock);}static void pio_string_write(struct kvm_io_device *pio_dev,			     struct kvm_vcpu *vcpu){	struct kvm_pio_request *io = &vcpu->pio;	void *pd = vcpu->pio_data;	int i;	mutex_lock(&vcpu->kvm->lock);	for (i = 0; i < io->cur_count; i++) {		kvm_iodevice_write(pio_dev, io->port,				   io->size,				   pd);		pd += io->size;	}	mutex_unlock(&vcpu->kvm->lock);}int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,		  int size, unsigned port){	struct kvm_io_device *pio_dev;	vcpu->run->exit_reason = KVM_EXIT_IO;	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;	vcpu->run->io.size = vcpu->pio.size = size;	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;	vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;	vcpu->run->io.port = vcpu->pio.port = port;	vcpu->pio.in = in;	vcpu->pio.string = 0;	vcpu->pio.down = 0;	vcpu->pio.guest_page_offset = 0;	vcpu->pio.rep = 0;	kvm_x86_ops->cache_regs(vcpu);	memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);	kvm_x86_ops->decache_regs(vcpu);	kvm_x86_ops->skip_emulated_instruction(vcpu);	pio_dev = vcpu_find_pio_dev(vcpu, port);	if (pio_dev) {		kernel_pio(pio_dev, vcpu, vcpu->pio_data);		complete_pio(vcpu);		return 1;	}	return 0;}EXPORT_SYMBOL_GPL(kvm_emulate_pio);int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,		  int size, unsigned long count, int down,		  gva_t address, int rep, unsigned port){	unsigned now, in_page;	int i, ret = 0;	int nr_pages = 1;	struct page *page;	struct kvm_io_device *pio_dev;	vcpu->run->exit_reason = KVM_EXIT_IO;	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;	vcpu->run->io.size = vcpu->pio.size = size;	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;	vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;	vcpu->run->io.port = vcpu->pio.port = port;	vcpu->pio.in = in;	vcpu->pio.string = 1;	vcpu->pio.down = down;	vcpu->pio.guest_page_offset = offset_in_page(address);	vcpu->pio.rep = rep;	if (!count) {		kvm_x86_ops->skip_emulated_instruction(vcpu);		return 1;	}	if (!down)		in_page = PAGE_SIZE - offset_in_page(address);	else		in_page = offset_in_page(address) + size;	now = min(count, (unsigned long)in_page / size);	if (!now) {		/*		 * String I/O straddles page boundary.  Pin two guest pages		 * so that we satisfy atomicity constraints.  Do just one		 * transaction to avoid complexity.		 */		nr_pages = 2;		now = 1;	}	if (down) {		/*		 * String I/O in reverse.  Yuck.  Kill the guest, fix later.		 */		pr_unimpl(vcpu, "guest string pio down\n");		inject_gp(vcpu);		return 1;	}	vcpu->run->io.count = now;	vcpu->pio.cur_count = now;	if (vcpu->pio.cur_count == vcpu->pio.count)		kvm_x86_ops->skip_emulated_instruction(vcpu);	for (i = 0; i < nr_pages; ++i) {		mutex_lock(&vcpu->kvm->lock);		page = gva_to_page(vcpu, address + i * PAGE_SIZE);		if (page)			get_page(page);		vcpu->pio.guest_pages[i] = page;		mutex_unlock(&vcpu->kvm->lock);		if (!page) {			inject_gp(vcpu);			free_pio_guest_pages(vcpu);			return 1;		}	}	pio_dev = vcpu_find_pio_dev(vcpu, port);	if (!vcpu->pio.in) {		/* string PIO write */		ret = pio_copy_data(vcpu);		if (ret >= 0 && pio_dev) {			pio_string_write(pio_dev, vcpu);			complete_pio(vcpu);			if (vcpu->pio.count == 0)				ret = 1;		}	} else if (pio_dev)		pr_unimpl(vcpu, "no string pio read support yet, "		       "port %x size %d count %ld\n",			port, size, count);	return ret;}EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);/* * Check if userspace requested an interrupt window, and that the * interrupt window is open. * * No need to exit to userspace if we already have an interrupt queued. */static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,					  struct kvm_run *kvm_run){	return (!vcpu->irq_summary &&		kvm_run->request_interrupt_window &&		vcpu->interrupt_window_open &&		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));}static void post_kvm_run_save(struct kvm_vcpu *vcpu,			      struct kvm_run *kvm_run){	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;	kvm_run->cr8 = get_cr8(vcpu);	kvm_run->apic_base = kvm_get_apic_base(vcpu);	if (irqchip_in_kernel(vcpu->kvm))		kvm_run->ready_for_interrupt_injection = 1;	else		kvm_run->ready_for_interrupt_injection =					(vcpu->interrupt_window_open &&					 vcpu->irq_summary == 0);}static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run){	int r;	if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {		printk("vcpu %d received sipi with vector # %x\n",		       vcpu->vcpu_id, vcpu->sipi_vector);		kvm_lapic_reset(vcpu);		kvm_x86_ops->vcpu_reset(vcpu);		vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;	}preempted:	if (vcpu->guest_debug.enabled)		kvm_x86_ops->guest_debug_pre(vcpu);again:	r = kvm_mmu_reload(vcpu);	if (unlikely(r))		goto out;	preempt_disable();	kvm_x86_ops->prepare_guest_switch(vcpu);	kvm_load_guest_fpu(vcpu);	local_irq_disable();	if (signal_pending(current)) {		local_irq_enable();		preempt_enable();		r = -EINTR;		kvm_run->exit_reason = KVM_EXIT_INTR;		++vcpu->stat.signal_exits;		goto out;	}	if (irqchip_in_kernel(vcpu->kvm))		kvm_x86_ops->inject_pending_irq(vcpu);	else if (!vcpu->mmio_read_completed)		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);	vcpu->guest_mode = 1;	kvm_guest_enter();	if (vcpu->requests)		if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))			kvm_x86_ops->tlb_flush(vcpu);	kvm_x86_ops->run(vcpu, kvm_run);	vcpu->guest_mode = 0;	local_irq_enable();	++vcpu->stat.exits;	/*	 * We must have an instruction between local_irq_enable() and	 * kvm_guest_exit(), so the timer interrupt isn't delayed by	 * the interrupt shadow.  The stat.exits increment will do nicely.	 * But we need to prevent reordering, hence this barrier():	 */	barrier();	kvm_guest_exit();	preempt_enable();	/*	 * Profile KVM exit RIPs:	 */	if (unlikely(prof_on == KVM_PROFILING)) {		kvm_x86_ops->cache_regs(vcpu);		profile_hit(KVM_PROFILING, (void *)vcpu->rip);	}	r = kvm_x86_ops->handle_exit(kvm_run, vcpu);	if (r > 0) {		if (dm_request_for_irq_injection(vcpu, kvm_run)) {			r = -EINTR;			kvm_run->exit_reason = KVM_EXIT_INTR;			++vcpu->stat.request_irq_exits;			goto out;		}		if (!need_resched()) {			++vcpu->stat.light_exits;			goto again;		}	}out:	if (r > 0) {		kvm_resched(vcpu);		goto preempted;	}	post_kvm_run_save(vcpu, kvm_run);	return r;}static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run){	int r;	sigset_t sigsaved;	vcpu_load(vcpu);	if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {		kvm_vcpu_block(vcpu);		vcpu_put(vcpu);		return -EAGAIN;	}	if (vcpu->sigset_active)		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);	/* re-sync apic's tpr */	if (!irqchip_in_kernel(vcpu->kvm))		set_cr8(vcpu, kvm_run->cr8);	if (vcpu->pio.cur_count) {		r = complete_pio(vcpu);		if (r)			goto out;	}	if (vcpu->mmio_needed) {		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);		vcpu->mmio_read_completed = 1;		vcpu->mmio_needed = 0;		r = emulate_instruction(vcpu, kvm_run,					vcpu->mmio_fault_cr2, 0);		if (r == EMULATE_DO_MMIO) {			/*			 * Read-modify-write.  Back to userspace.			 */			r = 0;			goto out;		}	}	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {		kvm_x86_ops->cache_regs(vcpu);		vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;		kvm_x86_ops->decache_regs(vcpu);	}	r = __vcpu_run(vcpu, kvm_run);out:	if (vcpu->sigset_active)		sigprocmask(SIG_SETMASK, &sigsaved, NULL);	vcpu_put(vcpu);	return r;}static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,				   struct kvm_regs *regs){	vcpu_load(vcpu);	kvm_x86_ops->cache_regs(vcpu);	regs->rax = vcpu->regs[VCPU_REGS_RAX];	regs->rbx = vcpu->regs[VCPU_REGS_RBX];	regs->rcx = vcpu->regs[VCPU_REGS_RCX];	regs->rdx = vcpu->regs[VCPU_REGS_RDX];	regs->rsi = vcpu->regs[VCPU_REGS_RSI];	regs->rdi = vcpu->regs[VCPU_REGS_RDI];	regs->rsp = vcpu->regs[VCPU_REGS_RSP];	regs->rbp = vcpu->regs[VCPU_REGS_RBP];#ifdef CONFIG_X86_64	regs->r8 = vcpu->regs[VCPU_REGS_R8];	regs->r9 = vcpu->regs[VCPU_REGS_R9];	regs->r10 = vcpu->regs[VCPU_REGS_R10];	regs->r11 = vcpu->regs[VCPU_REGS_R11];	regs->r12 = vcpu->regs[VCPU_REGS_R12];	regs->r13 = vcpu->regs[VCPU_REGS_R13];	regs->r14 = vcpu->regs[VCPU_REGS_R14];	regs->r15 = vcpu->regs[VCPU_REGS_R15];#endif	regs->rip = vcpu->rip;	regs->rflags = kvm_x86_ops->get_rflags(vcpu);	/*	 * Don't leak debug flags in case they were set for guest debugging	 */	if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);	vcpu_put(vcpu);	return 0;}static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,				   struct kvm_regs *regs){	vcpu_load(vcpu);	vcpu->regs[VCPU_REGS_RAX] = regs->rax;	vcpu->regs[VCPU_REGS_RBX] = regs->rbx;	vcpu->regs[VCPU_REGS_RCX] = regs->rcx;	vcpu->regs[VCPU_REGS_RDX] = regs->rdx;	vcpu->regs[VCPU_REGS_RSI] = regs->rsi;	vcpu->regs[VCPU_REGS_RDI] = regs->rdi;	vcpu->regs[VCPU_REGS_RSP] = regs->rsp;	vcpu->regs[VCPU_REGS_RBP] = regs->rbp;#ifdef CONFIG_X86_64	vcpu->regs[VCPU_REGS_R8] = regs->r8;	vcpu->regs[VCPU_REGS_R9] = regs->r9;	vcpu->regs[VCPU_REGS_R10] = regs->r10;	vcpu->regs[VCPU_REGS_R11] = regs->r11;	vcpu->regs[VCPU_REGS_R12] = regs->r12;	vcpu->regs[VCPU_REGS_R13] = regs->r13;	vcpu->regs[VCPU_REGS_R14] = regs->r14;	vcpu->regs[VCPU_REGS_R15] = regs->r15;#endif	vcpu->rip = regs->rip;	kvm_x86_ops->set_rflags(vcpu, regs->rflags);	kvm_x86_ops->decache_regs(vcpu);	vcpu_put(vcpu);	return 0;}static void get_segment(struct kvm_vcpu *vcpu,			struct kvm_segment *var, int seg){	return kvm_x86_ops->get_segment(vcpu, var, seg);}static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,				    struct kvm_sregs *sregs){	struct descriptor_table dt;	int pending_vec;	vcpu_load(vcpu);	get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);	get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);	get_segment(vcpu, &sregs->es, VCPU_SREG_ES);	get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);	get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);	get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);	get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);	get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);	kvm_x86_ops->get_idt(vcpu, &dt);	sregs->idt.limit = dt.limit;	sregs->idt.base = dt.base;	kvm_x86_ops->get_gdt(vcpu, &dt);	sregs->gdt.limit = dt.limit;	sregs->gdt.base = dt.base;	kvm_x86_ops->decache_cr4_guest_bits(vcpu);	sregs->cr0 = vcpu->cr0;	sregs->cr2 = vcpu->cr2;	sregs->cr3 = vcpu->cr3;	sregs->cr4 = vcpu->cr4;	sregs->cr8 = get_cr8(vcpu);	sregs->efer = vcpu->shadow_efer;	sregs->apic_base = kvm_get_apic_base(vcpu);	if (irqchip_in_kernel(vcpu->kvm)) {		memset(sregs->interrupt_bitmap, 0,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?