vmx.c

来自「linux 内核源代码」· C语言 代码 · 共 2,567 行 · 第 1/5 页

C
2,567
字号
	if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) {		vmcs_write16(sf->selector, save->selector);		vmcs_writel(sf->base, save->base);		vmcs_write32(sf->limit, save->limit);		vmcs_write32(sf->ar_bytes, save->ar);	} else {		u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)			<< AR_DPL_SHIFT;		vmcs_write32(sf->ar_bytes, 0x93 | dpl);	}}static void enter_pmode(struct kvm_vcpu *vcpu){	unsigned long flags;	vcpu->rmode.active = 0;	vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);	vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);	flags = vmcs_readl(GUEST_RFLAGS);	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);	vmcs_writel(GUEST_RFLAGS, flags);	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |			(vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));	update_exception_bitmap(vcpu);	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);	vmcs_write16(GUEST_SS_SELECTOR, 0);	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);	vmcs_write16(GUEST_CS_SELECTOR,		     vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);	vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);}static gva_t rmode_tss_base(struct kvm* kvm){	gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3;	return base_gfn << PAGE_SHIFT;}static void fix_rmode_seg(int seg, struct kvm_save_segment *save){	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];	save->selector = vmcs_read16(sf->selector);	save->base = vmcs_readl(sf->base);	save->limit = vmcs_read32(sf->limit);	save->ar = vmcs_read32(sf->ar_bytes);	vmcs_write16(sf->selector, vmcs_readl(sf->base) >> 4);	vmcs_write32(sf->limit, 0xffff);	vmcs_write32(sf->ar_bytes, 0xf3);}static void enter_rmode(struct kvm_vcpu *vcpu){	unsigned long flags;	vcpu->rmode.active = 1;	vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));	vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);	vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);	flags = vmcs_readl(GUEST_RFLAGS);	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;	flags |= IOPL_MASK | X86_EFLAGS_VM;	vmcs_writel(GUEST_RFLAGS, flags);	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);	update_exception_bitmap(vcpu);	vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);	vmcs_write32(GUEST_SS_LIMIT, 0xffff);	vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);	vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);	vmcs_write32(GUEST_CS_LIMIT, 0xffff);	if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)		vmcs_writel(GUEST_CS_BASE, 0xf0000);	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);	fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);	fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);	fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);	fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);	kvm_mmu_reset_context(vcpu);	init_rmode_tss(vcpu->kvm);}#ifdef CONFIG_X86_64static void enter_lmode(struct kvm_vcpu *vcpu){	u32 guest_tr_ar;	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {		printk(KERN_DEBUG "%s: tss fixup for long mode. \n",		       __FUNCTION__);		vmcs_write32(GUEST_TR_AR_BYTES,			     (guest_tr_ar & ~AR_TYPE_MASK)			     | AR_TYPE_BUSY_64_TSS);	}	vcpu->shadow_efer |= EFER_LMA;	find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;	vmcs_write32(VM_ENTRY_CONTROLS,		     vmcs_read32(VM_ENTRY_CONTROLS)		     | VM_ENTRY_IA32E_MODE);}static void exit_lmode(struct kvm_vcpu *vcpu){	vcpu->shadow_efer &= ~EFER_LMA;	vmcs_write32(VM_ENTRY_CONTROLS,		     vmcs_read32(VM_ENTRY_CONTROLS)		     & ~VM_ENTRY_IA32E_MODE);}#endifstatic void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu){	vcpu->cr4 &= KVM_GUEST_CR4_MASK;	vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;}static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0){	vmx_fpu_deactivate(vcpu);	if (vcpu->rmode.active && (cr0 & X86_CR0_PE))		enter_pmode(vcpu);	if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE))		enter_rmode(vcpu);#ifdef CONFIG_X86_64	if (vcpu->shadow_efer & EFER_LME) {		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))			enter_lmode(vcpu);		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))			exit_lmode(vcpu);	}#endif	vmcs_writel(CR0_READ_SHADOW, cr0);	vmcs_writel(GUEST_CR0,		    (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);	vcpu->cr0 = cr0;	if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))		vmx_fpu_activate(vcpu);}static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3){	vmcs_writel(GUEST_CR3, cr3);	if (vcpu->cr0 & X86_CR0_PE)		vmx_fpu_deactivate(vcpu);}static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4){	vmcs_writel(CR4_READ_SHADOW, cr4);	vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));	vcpu->cr4 = cr4;}#ifdef CONFIG_X86_64static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer){	struct vcpu_vmx *vmx = to_vmx(vcpu);	struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);	vcpu->shadow_efer = efer;	if (efer & EFER_LMA) {		vmcs_write32(VM_ENTRY_CONTROLS,				     vmcs_read32(VM_ENTRY_CONTROLS) |				     VM_ENTRY_IA32E_MODE);		msr->data = efer;	} else {		vmcs_write32(VM_ENTRY_CONTROLS,				     vmcs_read32(VM_ENTRY_CONTROLS) &				     ~VM_ENTRY_IA32E_MODE);		msr->data = efer & ~EFER_LME;	}	setup_msrs(vmx);}#endifstatic u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg){	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];	return vmcs_readl(sf->base);}static void vmx_get_segment(struct kvm_vcpu *vcpu,			    struct kvm_segment *var, int seg){	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];	u32 ar;	var->base = vmcs_readl(sf->base);	var->limit = vmcs_read32(sf->limit);	var->selector = vmcs_read16(sf->selector);	ar = vmcs_read32(sf->ar_bytes);	if (ar & AR_UNUSABLE_MASK)		ar = 0;	var->type = ar & 15;	var->s = (ar >> 4) & 1;	var->dpl = (ar >> 5) & 3;	var->present = (ar >> 7) & 1;	var->avl = (ar >> 12) & 1;	var->l = (ar >> 13) & 1;	var->db = (ar >> 14) & 1;	var->g = (ar >> 15) & 1;	var->unusable = (ar >> 16) & 1;}static u32 vmx_segment_access_rights(struct kvm_segment *var){	u32 ar;	if (var->unusable)		ar = 1 << 16;	else {		ar = var->type & 15;		ar |= (var->s & 1) << 4;		ar |= (var->dpl & 3) << 5;		ar |= (var->present & 1) << 7;		ar |= (var->avl & 1) << 12;		ar |= (var->l & 1) << 13;		ar |= (var->db & 1) << 14;		ar |= (var->g & 1) << 15;	}	if (ar == 0) /* a 0 value means unusable */		ar = AR_UNUSABLE_MASK;	return ar;}static void vmx_set_segment(struct kvm_vcpu *vcpu,			    struct kvm_segment *var, int seg){	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];	u32 ar;	if (vcpu->rmode.active && seg == VCPU_SREG_TR) {		vcpu->rmode.tr.selector = var->selector;		vcpu->rmode.tr.base = var->base;		vcpu->rmode.tr.limit = var->limit;		vcpu->rmode.tr.ar = vmx_segment_access_rights(var);		return;	}	vmcs_writel(sf->base, var->base);	vmcs_write32(sf->limit, var->limit);	vmcs_write16(sf->selector, var->selector);	if (vcpu->rmode.active && var->s) {		/*		 * Hack real-mode segments into vm86 compatibility.		 */		if (var->base == 0xffff0000 && var->selector == 0xf000)			vmcs_writel(sf->base, 0xf0000);		ar = 0xf3;	} else		ar = vmx_segment_access_rights(var);	vmcs_write32(sf->ar_bytes, ar);}static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l){	u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);	*db = (ar >> 14) & 1;	*l = (ar >> 13) & 1;}static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt){	dt->limit = vmcs_read32(GUEST_IDTR_LIMIT);	dt->base = vmcs_readl(GUEST_IDTR_BASE);}static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt){	vmcs_write32(GUEST_IDTR_LIMIT, dt->limit);	vmcs_writel(GUEST_IDTR_BASE, dt->base);}static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt){	dt->limit = vmcs_read32(GUEST_GDTR_LIMIT);	dt->base = vmcs_readl(GUEST_GDTR_BASE);}static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt){	vmcs_write32(GUEST_GDTR_LIMIT, dt->limit);	vmcs_writel(GUEST_GDTR_BASE, dt->base);}static int init_rmode_tss(struct kvm* kvm){	struct page *p1, *p2, *p3;	gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;	char *page;	p1 = gfn_to_page(kvm, fn++);	p2 = gfn_to_page(kvm, fn++);	p3 = gfn_to_page(kvm, fn);	if (!p1 || !p2 || !p3) {		kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__);		return 0;	}	page = kmap_atomic(p1, KM_USER0);	clear_page(page);	*(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;	kunmap_atomic(page, KM_USER0);	page = kmap_atomic(p2, KM_USER0);	clear_page(page);	kunmap_atomic(page, KM_USER0);	page = kmap_atomic(p3, KM_USER0);	clear_page(page);	*(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0;	kunmap_atomic(page, KM_USER0);	return 1;}static void seg_setup(int seg){	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];	vmcs_write16(sf->selector, 0);	vmcs_writel(sf->base, 0);	vmcs_write32(sf->limit, 0xffff);	vmcs_write32(sf->ar_bytes, 0x93);}/* * Sets up the vmcs for emulated real mode. */static int vmx_vcpu_setup(struct vcpu_vmx *vmx){	u32 host_sysenter_cs;	u32 junk;	unsigned long a;	struct descriptor_table dt;	int i;	int ret = 0;	unsigned long kvm_vmx_return;	u64 msr;	u32 exec_control;	if (!init_rmode_tss(vmx->vcpu.kvm)) {		ret = -ENOMEM;		goto out;	}	vmx->vcpu.rmode.active = 0;	vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();	set_cr8(&vmx->vcpu, 0);	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;	if (vmx->vcpu.vcpu_id == 0)		msr |= MSR_IA32_APICBASE_BSP;	kvm_set_apic_base(&vmx->vcpu, msr);	fx_init(&vmx->vcpu);	/*	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode	 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.	 */	if (vmx->vcpu.vcpu_id == 0) {		vmcs_write16(GUEST_CS_SELECTOR, 0xf000);		vmcs_writel(GUEST_CS_BASE, 0x000f0000);	} else {		vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);		vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);	}	vmcs_write32(GUEST_CS_LIMIT, 0xffff);	vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);	seg_setup(VCPU_SREG_DS);	seg_setup(VCPU_SREG_ES);	seg_setup(VCPU_SREG_FS);	seg_setup(VCPU_SREG_GS);	seg_setup(VCPU_SREG_SS);	vmcs_write16(GUEST_TR_SELECTOR, 0);	vmcs_writel(GUEST_TR_BASE, 0);	vmcs_write32(GUEST_TR_LIMIT, 0xffff);	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);	vmcs_write16(GUEST_LDTR_SELECTOR, 0);	vmcs_writel(GUEST_LDTR_BASE, 0);	vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);	vmcs_write32(GUEST_SYSENTER_CS, 0);	vmcs_writel(GUEST_SYSENTER_ESP, 0);	vmcs_writel(GUEST_SYSENTER_EIP, 0);	vmcs_writel(GUEST_RFLAGS, 0x02);	if (vmx->vcpu.vcpu_id == 0)		vmcs_writel(GUEST_RIP, 0xfff0);	else		vmcs_writel(GUEST_RIP, 0);	vmcs_writel(GUEST_RSP, 0);	//todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0	vmcs_writel(GUEST_DR7, 0x400);	vmcs_writel(GUEST_GDTR_BASE, 0);	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);	vmcs_writel(GUEST_IDTR_BASE, 0);	vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);	vmcs_write32(GUEST_ACTIVITY_STATE, 0);	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);	vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);	/* I/O */	vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));	vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));	guest_write_tsc(0);	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */	/* Special registers */	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);	/* Control */	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,		vmcs_config.pin_based_exec_ctrl);	exec_control = vmcs_config.cpu_based_exec_ctrl;	if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {		exec_control &= ~CPU_BASED_TPR_SHADOW;#ifdef CONFIG_X86_64		exec_control |= CPU_BASED_CR8_STORE_EXITING |				CPU_BASED_CR8_LOAD_EXITING;#endif	}	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */	vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */	vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */	vmcs_write16(HOST_FS_SELECTOR, read_fs());    /* 22.2.4 */	vmcs_write16(HOST_GS_SELECTOR, read_gs());    /* 22.2.4 */	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */#ifdef CONFIG_X86_64	rdmsrl(MSR_FS_BASE, a);	vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */	rdmsrl(MSR_GS_BASE, a);	vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */#else	vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */	vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */#endif	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */	get_idt(&dt);	vmcs_writel(HOST_IDTR_BASE, dt.base);   /* 22.2.4 */	asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));	vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?