vmx.c

来自「linux 内核源代码」· C语言 代码 · 共 2,567 行 · 第 1/5 页

C
2,567
字号
}static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run){	skip_emulated_instruction(vcpu);	return kvm_hypercall(vcpu, kvm_run);}/* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume.  Otherwise they set the kvm_run parameter to indicate what needs * to be done to userspace and return 0. */static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,				      struct kvm_run *kvm_run) = {	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,	[EXIT_REASON_CR_ACCESS]               = handle_cr,	[EXIT_REASON_DR_ACCESS]               = handle_dr,	[EXIT_REASON_CPUID]                   = handle_cpuid,	[EXIT_REASON_MSR_READ]                = handle_rdmsr,	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr,	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,	[EXIT_REASON_HLT]                     = handle_halt,	[EXIT_REASON_VMCALL]                  = handle_vmcall,	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold};static const int kvm_vmx_max_exit_handlers =	ARRAY_SIZE(kvm_vmx_exit_handlers);/* * The guest has exited.  See if we can fix it or if we need userspace * assistance. */static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu){	u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);	struct vcpu_vmx *vmx = to_vmx(vcpu);	if (unlikely(vmx->fail)) {		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;		kvm_run->fail_entry.hardware_entry_failure_reason			= vmcs_read32(VM_INSTRUCTION_ERROR);		return 0;	}	if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&				exit_reason != EXIT_REASON_EXCEPTION_NMI )		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "		       "exit reason is 0x%x\n", __FUNCTION__, exit_reason);	if (exit_reason < kvm_vmx_max_exit_handlers	    && kvm_vmx_exit_handlers[exit_reason])		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);	else {		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;		kvm_run->hw.hardware_exit_reason = exit_reason;	}	return 0;}static void vmx_flush_tlb(struct kvm_vcpu *vcpu){}static void update_tpr_threshold(struct kvm_vcpu *vcpu){	int max_irr, tpr;	if (!vm_need_tpr_shadow(vcpu->kvm))		return;	if (!kvm_lapic_enabled(vcpu) ||	    ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {		vmcs_write32(TPR_THRESHOLD, 0);		return;	}	tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4;	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);}static void enable_irq_window(struct kvm_vcpu *vcpu){	u32 cpu_based_vm_exec_control;	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);}static void vmx_intr_assist(struct kvm_vcpu *vcpu){	u32 idtv_info_field, intr_info_field;	int has_ext_irq, interrupt_window_open;	int vector;	kvm_inject_pending_timer_irqs(vcpu);	update_tpr_threshold(vcpu);	has_ext_irq = kvm_cpu_has_interrupt(vcpu);	intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);	idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);	if (intr_info_field & INTR_INFO_VALID_MASK) {		if (idtv_info_field & INTR_INFO_VALID_MASK) {			/* TODO: fault when IDT_Vectoring */			printk(KERN_ERR "Fault when IDT_Vectoring\n");		}		if (has_ext_irq)			enable_irq_window(vcpu);		return;	}	if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,				vmcs_read32(VM_EXIT_INSTRUCTION_LEN));		if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK))			vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,				vmcs_read32(IDT_VECTORING_ERROR_CODE));		if (unlikely(has_ext_irq))			enable_irq_window(vcpu);		return;	}	if (!has_ext_irq)		return;	interrupt_window_open =		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);	if (interrupt_window_open) {		vector = kvm_cpu_get_interrupt(vcpu);		vmx_inject_irq(vcpu, vector);		kvm_timer_intr_post(vcpu, vector);	} else		enable_irq_window(vcpu);}static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run){	struct vcpu_vmx *vmx = to_vmx(vcpu);	u32 intr_info;	/*	 * Loading guest fpu may have cleared host cr0.ts	 */	vmcs_writel(HOST_CR0, read_cr0());	asm (		/* Store host registers */#ifdef CONFIG_X86_64		"push %%rax; push %%rbx; push %%rdx;"		"push %%rsi; push %%rdi; push %%rbp;"		"push %%r8;  push %%r9;  push %%r10; push %%r11;"		"push %%r12; push %%r13; push %%r14; push %%r15;"		"push %%rcx \n\t"		ASM_VMX_VMWRITE_RSP_RDX "\n\t"#else		"pusha; push %%ecx \n\t"		ASM_VMX_VMWRITE_RSP_RDX "\n\t"#endif		/* Check if vmlaunch of vmresume is needed */		"cmp $0, %1 \n\t"		/* Load guest registers.  Don't clobber flags. */#ifdef CONFIG_X86_64		"mov %c[cr2](%3), %%rax \n\t"		"mov %%rax, %%cr2 \n\t"		"mov %c[rax](%3), %%rax \n\t"		"mov %c[rbx](%3), %%rbx \n\t"		"mov %c[rdx](%3), %%rdx \n\t"		"mov %c[rsi](%3), %%rsi \n\t"		"mov %c[rdi](%3), %%rdi \n\t"		"mov %c[rbp](%3), %%rbp \n\t"		"mov %c[r8](%3),  %%r8  \n\t"		"mov %c[r9](%3),  %%r9  \n\t"		"mov %c[r10](%3), %%r10 \n\t"		"mov %c[r11](%3), %%r11 \n\t"		"mov %c[r12](%3), %%r12 \n\t"		"mov %c[r13](%3), %%r13 \n\t"		"mov %c[r14](%3), %%r14 \n\t"		"mov %c[r15](%3), %%r15 \n\t"		"mov %c[rcx](%3), %%rcx \n\t" /* kills %3 (rcx) */#else		"mov %c[cr2](%3), %%eax \n\t"		"mov %%eax,   %%cr2 \n\t"		"mov %c[rax](%3), %%eax \n\t"		"mov %c[rbx](%3), %%ebx \n\t"		"mov %c[rdx](%3), %%edx \n\t"		"mov %c[rsi](%3), %%esi \n\t"		"mov %c[rdi](%3), %%edi \n\t"		"mov %c[rbp](%3), %%ebp \n\t"		"mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */#endif		/* Enter guest mode */		"jne .Llaunched \n\t"		ASM_VMX_VMLAUNCH "\n\t"		"jmp .Lkvm_vmx_return \n\t"		".Llaunched: " ASM_VMX_VMRESUME "\n\t"		".Lkvm_vmx_return: "		/* Save guest registers, load host registers, keep flags */#ifdef CONFIG_X86_64		"xchg %3,     (%%rsp) \n\t"		"mov %%rax, %c[rax](%3) \n\t"		"mov %%rbx, %c[rbx](%3) \n\t"		"pushq (%%rsp); popq %c[rcx](%3) \n\t"		"mov %%rdx, %c[rdx](%3) \n\t"		"mov %%rsi, %c[rsi](%3) \n\t"		"mov %%rdi, %c[rdi](%3) \n\t"		"mov %%rbp, %c[rbp](%3) \n\t"		"mov %%r8,  %c[r8](%3) \n\t"		"mov %%r9,  %c[r9](%3) \n\t"		"mov %%r10, %c[r10](%3) \n\t"		"mov %%r11, %c[r11](%3) \n\t"		"mov %%r12, %c[r12](%3) \n\t"		"mov %%r13, %c[r13](%3) \n\t"		"mov %%r14, %c[r14](%3) \n\t"		"mov %%r15, %c[r15](%3) \n\t"		"mov %%cr2, %%rax   \n\t"		"mov %%rax, %c[cr2](%3) \n\t"		"mov (%%rsp), %3 \n\t"		"pop  %%rcx; pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"		"pop  %%rdx; pop  %%rbx; pop  %%rax \n\t"#else		"xchg %3, (%%esp) \n\t"		"mov %%eax, %c[rax](%3) \n\t"		"mov %%ebx, %c[rbx](%3) \n\t"		"pushl (%%esp); popl %c[rcx](%3) \n\t"		"mov %%edx, %c[rdx](%3) \n\t"		"mov %%esi, %c[rsi](%3) \n\t"		"mov %%edi, %c[rdi](%3) \n\t"		"mov %%ebp, %c[rbp](%3) \n\t"		"mov %%cr2, %%eax  \n\t"		"mov %%eax, %c[cr2](%3) \n\t"		"mov (%%esp), %3 \n\t"		"pop %%ecx; popa \n\t"#endif		"setbe %0 \n\t"	      : "=q" (vmx->fail)	      : "r"(vmx->launched), "d"((unsigned long)HOST_RSP),		"c"(vcpu),		[rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),		[rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),		[rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),		[rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),		[rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),		[rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),		[rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])),#ifdef CONFIG_X86_64		[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),		[r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),		[r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),		[r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),		[r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),		[r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),		[r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),		[r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])),#endif		[cr2]"i"(offsetof(struct kvm_vcpu, cr2))	      : "cc", "memory" );	vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));	vmx->launched = 1;	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);	/* We need to handle NMIs before interrupts are enabled */	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */		asm("int $2");}static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,				  unsigned long addr,				  u32 err_code){	u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);	++vcpu->stat.pf_guest;	if (is_page_fault(vect_info)) {		printk(KERN_DEBUG "inject_page_fault: "		       "double fault 0x%lx @ 0x%lx\n",		       addr, vmcs_readl(GUEST_RIP));		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 0);		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,			     DF_VECTOR |			     INTR_TYPE_EXCEPTION |			     INTR_INFO_DELIEVER_CODE_MASK |			     INTR_INFO_VALID_MASK);		return;	}	vcpu->cr2 = addr;	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, err_code);	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,		     PF_VECTOR |		     INTR_TYPE_EXCEPTION |		     INTR_INFO_DELIEVER_CODE_MASK |		     INTR_INFO_VALID_MASK);}static void vmx_free_vmcs(struct kvm_vcpu *vcpu){	struct vcpu_vmx *vmx = to_vmx(vcpu);	if (vmx->vmcs) {		on_each_cpu(__vcpu_clear, vmx, 0, 1);		free_vmcs(vmx->vmcs);		vmx->vmcs = NULL;	}}static void vmx_free_vcpu(struct kvm_vcpu *vcpu){	struct vcpu_vmx *vmx = to_vmx(vcpu);	vmx_free_vmcs(vcpu);	kfree(vmx->host_msrs);	kfree(vmx->guest_msrs);	kvm_vcpu_uninit(vcpu);	kmem_cache_free(kvm_vcpu_cache, vmx);}static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id){	int err;	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);	int cpu;	if (!vmx)		return ERR_PTR(-ENOMEM);	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);	if (err)		goto free_vcpu;	if (irqchip_in_kernel(kvm)) {		err = kvm_create_lapic(&vmx->vcpu);		if (err < 0)			goto free_vcpu;	}	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);	if (!vmx->guest_msrs) {		err = -ENOMEM;		goto uninit_vcpu;	}	vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);	if (!vmx->host_msrs)		goto free_guest_msrs;	vmx->vmcs = alloc_vmcs();	if (!vmx->vmcs)		goto free_msrs;	vmcs_clear(vmx->vmcs);	cpu = get_cpu();	vmx_vcpu_load(&vmx->vcpu, cpu);	err = vmx_vcpu_setup(vmx);	vmx_vcpu_put(&vmx->vcpu);	put_cpu();	if (err)		goto free_vmcs;	return &vmx->vcpu;free_vmcs:	free_vmcs(vmx->vmcs);free_msrs:	kfree(vmx->host_msrs);free_guest_msrs:	kfree(vmx->guest_msrs);uninit_vcpu:	kvm_vcpu_uninit(&vmx->vcpu);free_vcpu:	kmem_cache_free(kvm_vcpu_cache, vmx);	return ERR_PTR(err);}static void __init vmx_check_processor_compat(void *rtn){	struct vmcs_config vmcs_conf;	*(int *)rtn = 0;	if (setup_vmcs_config(&vmcs_conf) < 0)		*(int *)rtn = -EIO;	if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {		printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",				smp_processor_id());		*(int *)rtn = -EIO;	}}static struct kvm_x86_ops vmx_x86_ops = {	.cpu_has_kvm_support = cpu_has_kvm_support,	.disabled_by_bios = vmx_disabled_by_bios,	.hardware_setup = hardware_setup,	.hardware_unsetup = hardware_unsetup,	.check_processor_compatibility = vmx_check_processor_compat,	.hardware_enable = hardware_enable,	.hardware_disable = hardware_disable,	.vcpu_create = vmx_create_vcpu,	.vcpu_free = vmx_free_vcpu,	.vcpu_reset = vmx_vcpu_reset,	.prepare_guest_switch = vmx_save_host_state,	.vcpu_load = vmx_vcpu_load,	.vcpu_put = vmx_vcpu_put,	.vcpu_decache = vmx_vcpu_decache,	.set_guest_debug = set_guest_debug,	.guest_debug_pre = kvm_guest_debug_pre,	.get_msr = vmx_get_msr,	.set_msr = vmx_set_msr,	.get_segment_base = vmx_get_segment_base,	.get_segment = vmx_get_segment,	.set_segment = vmx_set_segment,	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,	.set_cr0 = vmx_set_cr0,	.set_cr3 = vmx_set_cr3,	.set_cr4 = vmx_set_cr4,#ifdef CONFIG_X86_64	.set_efer = vmx_set_efer,#endif	.get_idt = vmx_get_idt,	.set_idt = vmx_set_idt,	.get_gdt = vmx_get_gdt,	.set_gdt = vmx_set_gdt,	.cache_regs = vcpu_load_rsp_rip,	.decache_regs = vcpu_put_rsp_rip,	.get_rflags = vmx_get_rflags,	.set_rflags = vmx_set_rflags,	.tlb_flush = vmx_flush_tlb,	.inject_page_fault = vmx_inject_page_fault,	.inject_gp = vmx_inject_gp,	.run = vmx_vcpu_run,	.handle_exit = kvm_handle_exit,	.skip_emulated_instruction = skip_emulated_instruction,	.patch_hypercall = vmx_patch_hypercall,	.get_irq = vmx_get_irq,	.set_irq = vmx_inject_irq,	.inject_pending_irq = vmx_intr_assist,	.inject_pending_vectors = do_interrupt_requests,};static int __init vmx_init(void){	void *iova;	int r;	vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);	if (!vmx_io_bitmap_a)		return -ENOMEM;	vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);	if (!vmx_io_bitmap_b) {		r = -ENOMEM;		goto out;	}	/*	 * Allow direct access to the PC debug port (it is often used for I/O	 * delays, but the vmexits simply slow things down).	 */	iova = kmap(vmx_io_bitmap_a);	memset(iova, 0xff, PAGE_SIZE);	clear_bit(0x80, iova);	kunmap(vmx_io_bitmap_a);	iova = kmap(vmx_io_bitmap_b);	memset(iova, 0xff, PAGE_SIZE);	kunmap(vmx_io_bitmap_b);	r = kvm_init_x86(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);	if (r)		goto out1;	return 0;out1:	__free_page(vmx_io_bitmap_b);out:	__free_page(vmx_io_bitmap_a);	return r;}static void __exit vmx_exit(void){	__free_page(vmx_io_bitmap_b);	__free_page(vmx_io_bitmap_a);	kvm_exit_x86();}module_init(vmx_init)module_exit(vmx_exit)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?