vmx.c

来自「linux 内核源代码」· C语言 代码 · 共 2,567 行 · 第 1/5 页

C
2,567
字号
/* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * Copyright (C) 2006 Qumranet, Inc. * * Authors: *   Avi Kivity   <avi@qumranet.com> *   Yaniv Kamay  <yaniv@qumranet.com> * * This work is licensed under the terms of the GNU GPL, version 2.  See * the COPYING file in the top-level directory. * */#include "kvm.h"#include "x86_emulate.h"#include "irq.h"#include "vmx.h"#include "segment_descriptor.h"#include <linux/module.h>#include <linux/kernel.h>#include <linux/mm.h>#include <linux/highmem.h>#include <linux/sched.h>#include <asm/io.h>#include <asm/desc.h>MODULE_AUTHOR("Qumranet");MODULE_LICENSE("GPL");struct vmcs {	u32 revision_id;	u32 abort;	char data[0];};struct vcpu_vmx {	struct kvm_vcpu       vcpu;	int                   launched;	u8                    fail;	struct kvm_msr_entry *guest_msrs;	struct kvm_msr_entry *host_msrs;	int                   nmsrs;	int                   save_nmsrs;	int                   msr_offset_efer;#ifdef CONFIG_X86_64	int                   msr_offset_kernel_gs_base;#endif	struct vmcs          *vmcs;	struct {		int           loaded;		u16           fs_sel, gs_sel, ldt_sel;		int           gs_ldt_reload_needed;		int           fs_reload_needed;	}host_state;};static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu){	return container_of(vcpu, struct vcpu_vmx, vcpu);}static int init_rmode_tss(struct kvm *kvm);static DEFINE_PER_CPU(struct vmcs *, vmxarea);static DEFINE_PER_CPU(struct vmcs *, current_vmcs);static struct page *vmx_io_bitmap_a;static struct page *vmx_io_bitmap_b;#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)static struct vmcs_config {	int size;	int order;	u32 revision_id;	u32 pin_based_exec_ctrl;	u32 cpu_based_exec_ctrl;	u32 vmexit_ctrl;	u32 vmentry_ctrl;} vmcs_config;#define VMX_SEGMENT_FIELD(seg)					\	[VCPU_SREG_##seg] = {                                   \		.selector = GUEST_##seg##_SELECTOR,		\		.base = GUEST_##seg##_BASE,		   	\		.limit = GUEST_##seg##_LIMIT,		   	\		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\	}static struct kvm_vmx_segment_field {	unsigned selector;	unsigned base;	unsigned limit;	unsigned ar_bytes;} kvm_vmx_segment_fields[] = {	VMX_SEGMENT_FIELD(CS),	VMX_SEGMENT_FIELD(DS),	VMX_SEGMENT_FIELD(ES),	VMX_SEGMENT_FIELD(FS),	VMX_SEGMENT_FIELD(GS),	VMX_SEGMENT_FIELD(SS),	VMX_SEGMENT_FIELD(TR),	VMX_SEGMENT_FIELD(LDTR),};/* * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it * away by decrementing the array size. */static const u32 vmx_msr_index[] = {#ifdef CONFIG_X86_64	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,#endif	MSR_EFER, MSR_K6_STAR,};#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)static void load_msrs(struct kvm_msr_entry *e, int n){	int i;	for (i = 0; i < n; ++i)		wrmsrl(e[i].index, e[i].data);}static void save_msrs(struct kvm_msr_entry *e, int n){	int i;	for (i = 0; i < n; ++i)		rdmsrl(e[i].index, e[i].data);}static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr){	return (u64)msr.data & EFER_SAVE_RESTORE_BITS;}static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx){	int efer_offset = vmx->msr_offset_efer;	return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) !=		msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);}static inline int is_page_fault(u32 intr_info){	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |			     INTR_INFO_VALID_MASK)) ==		(INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);}static inline int is_no_device(u32 intr_info){	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |			     INTR_INFO_VALID_MASK)) ==		(INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);}static inline int is_external_interrupt(u32 intr_info){	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);}static inline int cpu_has_vmx_tpr_shadow(void){	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);}static inline int vm_need_tpr_shadow(struct kvm *kvm){	return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));}static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr){	int i;	for (i = 0; i < vmx->nmsrs; ++i)		if (vmx->guest_msrs[i].index == msr)			return i;	return -1;}static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr){	int i;	i = __find_msr_index(vmx, msr);	if (i >= 0)		return &vmx->guest_msrs[i];	return NULL;}static void vmcs_clear(struct vmcs *vmcs){	u64 phys_addr = __pa(vmcs);	u8 error;	asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0"		      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)		      : "cc", "memory");	if (error)		printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",		       vmcs, phys_addr);}static void __vcpu_clear(void *arg){	struct vcpu_vmx *vmx = arg;	int cpu = raw_smp_processor_id();	if (vmx->vcpu.cpu == cpu)		vmcs_clear(vmx->vmcs);	if (per_cpu(current_vmcs, cpu) == vmx->vmcs)		per_cpu(current_vmcs, cpu) = NULL;	rdtscll(vmx->vcpu.host_tsc);}static void vcpu_clear(struct vcpu_vmx *vmx){	if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1)		smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear,					 vmx, 0, 1);	else		__vcpu_clear(vmx);	vmx->launched = 0;}static unsigned long vmcs_readl(unsigned long field){	unsigned long value;	asm volatile (ASM_VMX_VMREAD_RDX_RAX		      : "=a"(value) : "d"(field) : "cc");	return value;}static u16 vmcs_read16(unsigned long field){	return vmcs_readl(field);}static u32 vmcs_read32(unsigned long field){	return vmcs_readl(field);}static u64 vmcs_read64(unsigned long field){#ifdef CONFIG_X86_64	return vmcs_readl(field);#else	return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);#endif}static noinline void vmwrite_error(unsigned long field, unsigned long value){	printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",	       field, value, vmcs_read32(VM_INSTRUCTION_ERROR));	dump_stack();}static void vmcs_writel(unsigned long field, unsigned long value){	u8 error;	asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"		       : "=q"(error) : "a"(value), "d"(field) : "cc" );	if (unlikely(error))		vmwrite_error(field, value);}static void vmcs_write16(unsigned long field, u16 value){	vmcs_writel(field, value);}static void vmcs_write32(unsigned long field, u32 value){	vmcs_writel(field, value);}static void vmcs_write64(unsigned long field, u64 value){#ifdef CONFIG_X86_64	vmcs_writel(field, value);#else	vmcs_writel(field, value);	asm volatile ("");	vmcs_writel(field+1, value >> 32);#endif}static void vmcs_clear_bits(unsigned long field, u32 mask){	vmcs_writel(field, vmcs_readl(field) & ~mask);}static void vmcs_set_bits(unsigned long field, u32 mask){	vmcs_writel(field, vmcs_readl(field) | mask);}static void update_exception_bitmap(struct kvm_vcpu *vcpu){	u32 eb;	eb = 1u << PF_VECTOR;	if (!vcpu->fpu_active)		eb |= 1u << NM_VECTOR;	if (vcpu->guest_debug.enabled)		eb |= 1u << 1;	if (vcpu->rmode.active)		eb = ~0;	vmcs_write32(EXCEPTION_BITMAP, eb);}static void reload_tss(void){#ifndef CONFIG_X86_64	/*	 * VT restores TR but not its size.  Useless.	 */	struct descriptor_table gdt;	struct segment_descriptor *descs;	get_gdt(&gdt);	descs = (void *)gdt.base;	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */	load_TR_desc();#endif}static void load_transition_efer(struct vcpu_vmx *vmx){	u64 trans_efer;	int efer_offset = vmx->msr_offset_efer;	trans_efer = vmx->host_msrs[efer_offset].data;	trans_efer &= ~EFER_SAVE_RESTORE_BITS;	trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);	wrmsrl(MSR_EFER, trans_efer);	vmx->vcpu.stat.efer_reload++;}static void vmx_save_host_state(struct kvm_vcpu *vcpu){	struct vcpu_vmx *vmx = to_vmx(vcpu);	if (vmx->host_state.loaded)		return;	vmx->host_state.loaded = 1;	/*	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not	 * allow segment selectors with cpl > 0 or ti == 1.	 */	vmx->host_state.ldt_sel = read_ldt();	vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;	vmx->host_state.fs_sel = read_fs();	if (!(vmx->host_state.fs_sel & 7)) {		vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);		vmx->host_state.fs_reload_needed = 0;	} else {		vmcs_write16(HOST_FS_SELECTOR, 0);		vmx->host_state.fs_reload_needed = 1;	}	vmx->host_state.gs_sel = read_gs();	if (!(vmx->host_state.gs_sel & 7))		vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);	else {		vmcs_write16(HOST_GS_SELECTOR, 0);		vmx->host_state.gs_ldt_reload_needed = 1;	}#ifdef CONFIG_X86_64	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));#else	vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));	vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));#endif#ifdef CONFIG_X86_64	if (is_long_mode(&vmx->vcpu)) {		save_msrs(vmx->host_msrs +			  vmx->msr_offset_kernel_gs_base, 1);	}#endif	load_msrs(vmx->guest_msrs, vmx->save_nmsrs);	if (msr_efer_need_save_restore(vmx))		load_transition_efer(vmx);}static void vmx_load_host_state(struct vcpu_vmx *vmx){	unsigned long flags;	if (!vmx->host_state.loaded)		return;	vmx->host_state.loaded = 0;	if (vmx->host_state.fs_reload_needed)		load_fs(vmx->host_state.fs_sel);	if (vmx->host_state.gs_ldt_reload_needed) {		load_ldt(vmx->host_state.ldt_sel);		/*		 * If we have to reload gs, we must take care to		 * preserve our gs base.		 */		local_irq_save(flags);		load_gs(vmx->host_state.gs_sel);#ifdef CONFIG_X86_64		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));#endif		local_irq_restore(flags);	}	reload_tss();	save_msrs(vmx->guest_msrs, vmx->save_nmsrs);	load_msrs(vmx->host_msrs, vmx->save_nmsrs);	if (msr_efer_need_save_restore(vmx))		load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);}/* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu){	struct vcpu_vmx *vmx = to_vmx(vcpu);	u64 phys_addr = __pa(vmx->vmcs);	u64 tsc_this, delta;	if (vcpu->cpu != cpu) {		vcpu_clear(vmx);		kvm_migrate_apic_timer(vcpu);	}	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {		u8 error;		per_cpu(current_vmcs, cpu) = vmx->vmcs;		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)			      : "cc");		if (error)			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",			       vmx->vmcs, phys_addr);	}	if (vcpu->cpu != cpu) {		struct descriptor_table dt;		unsigned long sysenter_esp;		vcpu->cpu = cpu;		/*		 * Linux uses per-cpu TSS and GDT, so set these when switching		 * processors.		 */		vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */		get_gdt(&dt);		vmcs_writel(HOST_GDTR_BASE, dt.base);   /* 22.2.4 */		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */		/*		 * Make sure the time stamp counter is monotonous.		 */		rdtscll(tsc_this);		delta = vcpu->host_tsc - tsc_this;		vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);	}}static void vmx_vcpu_put(struct kvm_vcpu *vcpu){	vmx_load_host_state(to_vmx(vcpu));	kvm_put_guest_fpu(vcpu);}static void vmx_fpu_activate(struct kvm_vcpu *vcpu){	if (vcpu->fpu_active)		return;	vcpu->fpu_active = 1;	vmcs_clear_bits(GUEST_CR0, X86_CR0_TS);	if (vcpu->cr0 & X86_CR0_TS)		vmcs_set_bits(GUEST_CR0, X86_CR0_TS);	update_exception_bitmap(vcpu);}static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu){	if (!vcpu->fpu_active)		return;	vcpu->fpu_active = 0;	vmcs_set_bits(GUEST_CR0, X86_CR0_TS);	update_exception_bitmap(vcpu);}static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?