vmx.c
来自「xen虚拟机源代码安装包」· C语言 代码 · 共 2,245 行 · 第 1/5 页
C
2,245 行
return __vmread(GUEST_INTERRUPTIBILITY_INFO);}static void vmx_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow){ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);}static void vmx_load_pdptrs(struct vcpu *v){ unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3], mfn; uint64_t *guest_pdptrs; p2m_type_t p2mt; char *p; /* EPT needs to load PDPTRS into VMCS for PAE. */ if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) ) return; if ( cr3 & 0x1fUL ) goto crash; mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) ) goto crash; p = map_domain_page(mfn); guest_pdptrs = (uint64_t *)(p + (cr3 & ~PAGE_MASK)); /* * We do not check the PDPTRs for validity. The CPU will do this during * vm entry, and we can handle the failure there and crash the guest. * The only thing we could do better here is #GP instead. */ vmx_vmcs_enter(v); __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]); __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]); __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]); __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]);#ifdef __i386__ __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32); __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32); __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32); __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32);#endif vmx_vmcs_exit(v); unmap_domain_page(p); return; crash: domain_crash(v->domain);}static void vmx_update_host_cr3(struct vcpu *v){ vmx_vmcs_enter(v); __vmwrite(HOST_CR3, v->arch.cr3); vmx_vmcs_exit(v);}static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr){ vmx_vmcs_enter(v); switch ( cr ) { case 0: { unsigned long hw_cr0_mask = X86_CR0_NE | X86_CR0_PG | X86_CR0_PE; if ( paging_mode_shadow(v->domain) ) hw_cr0_mask |= X86_CR0_WP; if ( paging_mode_hap(v->domain) ) { /* We manage GUEST_CR3 when guest CR0.PE is zero. */ uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); v->arch.hvm_vmx.exec_control &= ~cr3_ctls; if ( !hvm_paging_enabled(v) ) v->arch.hvm_vmx.exec_control |= cr3_ctls; __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); /* Changing CR0.PE can change some bits in real CR4. */ vmx_update_guest_cr(v, 4); } if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { if ( v != current ) hw_cr0_mask |= X86_CR0_TS; else if ( v->arch.hvm_vcpu.hw_cr[0] & X86_CR0_TS ) vmx_fpu_enter(v); } v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE; if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE; v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); break; } case 2: /* CR2 is updated in exit stub. */ break; case 3: if ( paging_mode_hap(v->domain) ) { if ( !hvm_paging_enabled(v) ) v->arch.hvm_vcpu.hw_cr[3] = v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT]; vmx_load_pdptrs(v); } __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]); vpid_sync_vcpu_all(v); break; case 4: v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; if ( paging_mode_hap(v->domain) ) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) ) { v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE; v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; } __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); break; default: BUG(); } vmx_vmcs_exit(v);}static void vmx_update_guest_efer(struct vcpu *v){#ifdef __x86_64__ unsigned long vm_entry_value; vmx_vmcs_enter(v); vm_entry_value = __vmread(VM_ENTRY_CONTROLS); if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA ) vm_entry_value |= VM_ENTRY_IA32E_MODE; else vm_entry_value &= ~VM_ENTRY_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); vmx_vmcs_exit(v);#endif if ( v == current ) write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) | (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE)));}static void vmx_flush_guest_tlbs(void){ /* * If VPID (i.e. tagged TLB support) is not enabled, the fact that * we're in Xen at all means any guest will have a clean TLB when * it's next run, because VMRESUME will flush it for us. * * If enabled, we invalidate all translations associated with all * VPID values. */ vpid_sync_all();}static void __ept_sync_domain(void *info){ struct domain *d = info; __invept(1, d->arch.hvm_domain.vmx.ept_control.eptp, 0);}void ept_sync_domain(struct domain *d){ /* Only if using EPT and this domain has some VCPUs to dirty. */ if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] ) { ASSERT(local_irq_is_enabled()); on_each_cpu(__ept_sync_domain, d, 1, 1); }}static void __vmx_inject_exception( struct vcpu *v, int trap, int type, int error_code){ unsigned long intr_fields; /* * NB. Callers do not need to worry about clearing STI/MOV-SS blocking: * "If the VM entry is injecting, there is no blocking by STI or by * MOV SS following the VM entry, regardless of the contents of the * interruptibility-state field [in the guest-state area before the * VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State). */ intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap); if ( error_code != HVM_DELIVER_NO_ERROR_CODE ) { __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_fields |= INTR_INFO_DELIVER_CODE_MASK; } __vmwrite(VM_ENTRY_INTR_INFO, intr_fields); if ( trap == TRAP_page_fault ) HVMTRACE_LONG_2D(PF_INJECT, v, error_code, TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2])); else HVMTRACE_2D(INJ_EXC, v, trap, error_code);}void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code){ unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO); if ( unlikely(intr_info & INTR_INFO_VALID_MASK) && (((intr_info >> 8) & 7) == X86_EVENTTYPE_HW_EXCEPTION) ) { trap = hvm_combine_hw_exceptions((uint8_t)intr_info, trap); if ( trap == TRAP_double_fault ) error_code = 0; } __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);}void vmx_inject_extint(struct vcpu *v, int trap){ __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR, HVM_DELIVER_NO_ERROR_CODE);}void vmx_inject_nmi(struct vcpu *v){ __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI, HVM_DELIVER_NO_ERROR_CODE);}static void vmx_inject_exception( unsigned int trapnr, int errcode, unsigned long cr2){ struct vcpu *curr = current; vmx_inject_hw_exception(curr, trapnr, errcode); if ( trapnr == TRAP_page_fault ) curr->arch.hvm_vcpu.guest_cr[2] = cr2; if ( (trapnr == TRAP_debug) && (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) ) { __restore_debug_registers(curr); write_debugreg(6, read_debugreg(6) | 0x4000); }}static int vmx_event_pending(struct vcpu *v){ ASSERT(v == current); return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);}static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs){ return vpmu_do_interrupt(regs);}static void vmx_set_uc_mode(struct vcpu *v){ if ( paging_mode_hap(v->domain) ) ept_change_entry_emt_with_range( v->domain, 0, v->domain->arch.p2m->max_mapped_pfn); vpid_sync_all();}static struct hvm_function_table vmx_function_table = { .name = "VMX", .domain_initialise = vmx_domain_initialise, .domain_destroy = vmx_domain_destroy, .vcpu_initialise = vmx_vcpu_initialise, .vcpu_destroy = vmx_vcpu_destroy, .save_cpu_ctxt = vmx_save_vmcs_ctxt, .load_cpu_ctxt = vmx_load_vmcs_ctxt, .get_interrupt_shadow = vmx_get_interrupt_shadow, .set_interrupt_shadow = vmx_set_interrupt_shadow, .guest_x86_mode = vmx_guest_x86_mode, .get_segment_register = vmx_get_segment_register, .set_segment_register = vmx_set_segment_register, .update_host_cr3 = vmx_update_host_cr3, .update_guest_cr = vmx_update_guest_cr, .update_guest_efer = vmx_update_guest_efer, .flush_guest_tlbs = vmx_flush_guest_tlbs, .set_tsc_offset = vmx_set_tsc_offset, .inject_exception = vmx_inject_exception, .init_hypercall_page = vmx_init_hypercall_page, .event_pending = vmx_event_pending, .do_pmu_interrupt = vmx_do_pmu_interrupt, .cpu_up = vmx_cpu_up, .cpu_down = vmx_cpu_down, .cpuid_intercept = vmx_cpuid_intercept, .wbinvd_intercept = vmx_wbinvd_intercept, .fpu_dirty_intercept = vmx_fpu_dirty_intercept, .msr_read_intercept = vmx_msr_read_intercept, .msr_write_intercept = vmx_msr_write_intercept, .invlpg_intercept = vmx_invlpg_intercept, .set_uc_mode = vmx_set_uc_mode};static unsigned long *vpid_bitmap;#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS)void start_vmx(void){ static int bootstrapped; vmx_save_host_msrs(); if ( bootstrapped ) { if ( hvm_enabled && !vmx_cpu_up() ) { printk("VMX: FATAL: failed to initialise CPU%d!\n", smp_processor_id()); BUG(); } return; } bootstrapped = 1; /* Xen does not fill x86_capability words except 0. */ boot_cpu_data.x86_capability[4] = cpuid_ecx(1); if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) ) return; set_in_cr4(X86_CR4_VMXE); if ( !vmx_cpu_up() ) { printk("VMX: failed to initialise.\n"); return; } if ( cpu_has_vmx_ept ) { printk("VMX: EPT is available.\n"); vmx_function_table.hap_supported = 1; } if ( cpu_has_vmx_vpid ) { printk("VMX: VPID is available.\n"); vpid_bitmap = xmalloc_array( unsigned long, BITS_TO_LONGS(VPID_BITMAP_SIZE)); BUG_ON(vpid_bitmap == NULL); memset(vpid_bitmap, 0, BITS_TO_LONGS(VPID_BITMAP_SIZE) * sizeof(long)); /* VPID 0 is used by VMX root mode (the hypervisor). */ __set_bit(0, vpid_bitmap); } setup_vmcs_dump(); hvm_enable(&vmx_function_table);}/* * Not all cases receive valid value in the VM-exit instruction length field. * Callers must know what they're doing! */static int __get_instruction_length(void){ int len; len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */ BUG_ON((len < 1) || (len > 15)); return len;}static void __update_guest_eip(unsigned long inst_len){ struct cpu_user_regs *regs = guest_cpu_user_regs(); unsigned long x; regs->eip += inst_len; regs->eflags &= ~X86_EFLAGS_RF; x = __vmread(GUEST_INTERRUPTIBILITY_INFO); if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) ) { x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS); __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x); } if ( regs->eflags & X86_EFLAGS_TF ) vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);}static void vmx_fpu_dirty_intercept(void){ struct vcpu *curr = current; vmx_fpu_enter(curr); /* Disable TS in guest CR0 unless the guest wants the exception too. */ if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { curr->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; __vmwrite(GUEST_CR0, curr->arch.hvm_vcpu.hw_cr[0]); }}#define bitmaskof(idx) (1U << ((idx) & 31))static void vmx_cpuid_intercept( unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx){ unsigned int input = *eax; struct segment_register cs; struct vcpu *v = current; hvm_cpuid(input, eax, ebx, ecx, edx); switch ( input ) { case 0x80000001: /* SYSCALL is visible iff running in long mode. */ hvm_get_segment_register(v, x86_seg_cs, &cs); if ( cs.attr.fields.l ) *edx |= bitmaskof(X86_FEATURE_SYSCALL); else *edx &= ~(bitmaskof(X86_FEATURE_SYSCALL)); break; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?