vmx.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 2,245 行 · 第 1/5 页

C
2,245
字号
    return __vmread(GUEST_INTERRUPTIBILITY_INFO);}static void vmx_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow){    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);}static void vmx_load_pdptrs(struct vcpu *v){    unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3], mfn;    uint64_t *guest_pdptrs;    p2m_type_t p2mt;    char *p;    /* EPT needs to load PDPTRS into VMCS for PAE. */    if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) )        return;    if ( cr3 & 0x1fUL )        goto crash;    mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));    if ( !p2m_is_ram(p2mt) )        goto crash;    p = map_domain_page(mfn);    guest_pdptrs = (uint64_t *)(p + (cr3 & ~PAGE_MASK));    /*     * We do not check the PDPTRs for validity. The CPU will do this during     * vm entry, and we can handle the failure there and crash the guest.     * The only thing we could do better here is #GP instead.     */    vmx_vmcs_enter(v);    __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]);    __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]);    __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]);    __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]);#ifdef __i386__    __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32);    __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32);    __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32);    __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32);#endif    vmx_vmcs_exit(v);    unmap_domain_page(p);    return; crash:    domain_crash(v->domain);}static void vmx_update_host_cr3(struct vcpu *v){    vmx_vmcs_enter(v);    __vmwrite(HOST_CR3, v->arch.cr3);    vmx_vmcs_exit(v);}static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr){    vmx_vmcs_enter(v);    switch ( cr )    {    case 0: {        unsigned long hw_cr0_mask =            X86_CR0_NE | X86_CR0_PG | X86_CR0_PE;        if ( paging_mode_shadow(v->domain) )           hw_cr0_mask |= X86_CR0_WP;        if ( paging_mode_hap(v->domain) )        {            /* We manage GUEST_CR3 when guest CR0.PE is zero. */            uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING |                                 CPU_BASED_CR3_STORE_EXITING);            v->arch.hvm_vmx.exec_control &= ~cr3_ctls;            if ( !hvm_paging_enabled(v) )                v->arch.hvm_vmx.exec_control |= cr3_ctls;            __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);            /* Changing CR0.PE can change some bits in real CR4. */            vmx_update_guest_cr(v, 4);        }        if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )        {            if ( v != current )                hw_cr0_mask |= X86_CR0_TS;            else if ( v->arch.hvm_vcpu.hw_cr[0] & X86_CR0_TS )                vmx_fpu_enter(v);        }        v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE;        if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )            v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE;        v->arch.hvm_vcpu.hw_cr[0] =            v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;        __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);        __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);        break;    }    case 2:        /* CR2 is updated in exit stub. */        break;    case 3:        if ( paging_mode_hap(v->domain) )        {            if ( !hvm_paging_enabled(v) )                v->arch.hvm_vcpu.hw_cr[3] =                    v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT];            vmx_load_pdptrs(v);        }         __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);        vpid_sync_vcpu_all(v);        break;    case 4:        v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;        if ( paging_mode_hap(v->domain) )            v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;        v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4];        if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) )        {            v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;            v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;        }        __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);        __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);        break;    default:        BUG();    }    vmx_vmcs_exit(v);}static void vmx_update_guest_efer(struct vcpu *v){#ifdef __x86_64__    unsigned long vm_entry_value;    vmx_vmcs_enter(v);    vm_entry_value = __vmread(VM_ENTRY_CONTROLS);    if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA )        vm_entry_value |= VM_ENTRY_IA32E_MODE;    else        vm_entry_value &= ~VM_ENTRY_IA32E_MODE;    __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);    vmx_vmcs_exit(v);#endif    if ( v == current )        write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |                   (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE)));}static void vmx_flush_guest_tlbs(void){    /*     * If VPID (i.e. tagged TLB support) is not enabled, the fact that     * we're in Xen at all means any guest will have a clean TLB when     * it's next run, because VMRESUME will flush it for us.     *     * If enabled, we invalidate all translations associated with all     * VPID values.     */    vpid_sync_all();}static void __ept_sync_domain(void *info){    struct domain *d = info;    __invept(1, d->arch.hvm_domain.vmx.ept_control.eptp, 0);}void ept_sync_domain(struct domain *d){    /* Only if using EPT and this domain has some VCPUs to dirty. */    if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] )    {        ASSERT(local_irq_is_enabled());        on_each_cpu(__ept_sync_domain, d, 1, 1);    }}static void __vmx_inject_exception(    struct vcpu *v, int trap, int type, int error_code){    unsigned long intr_fields;    /*     * NB. Callers do not need to worry about clearing STI/MOV-SS blocking:     *  "If the VM entry is injecting, there is no blocking by STI or by     *   MOV SS following the VM entry, regardless of the contents of the     *   interruptibility-state field [in the guest-state area before the     *   VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State).     */    intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap);    if ( error_code != HVM_DELIVER_NO_ERROR_CODE ) {        __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);        intr_fields |= INTR_INFO_DELIVER_CODE_MASK;    }    __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);    if ( trap == TRAP_page_fault )        HVMTRACE_LONG_2D(PF_INJECT, v, error_code,            TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2]));    else        HVMTRACE_2D(INJ_EXC, v, trap, error_code);}void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code){    unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);    if ( unlikely(intr_info & INTR_INFO_VALID_MASK) &&         (((intr_info >> 8) & 7) == X86_EVENTTYPE_HW_EXCEPTION) )    {        trap = hvm_combine_hw_exceptions((uint8_t)intr_info, trap);        if ( trap == TRAP_double_fault )            error_code = 0;    }    __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);}void vmx_inject_extint(struct vcpu *v, int trap){    __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR,                           HVM_DELIVER_NO_ERROR_CODE);}void vmx_inject_nmi(struct vcpu *v){    __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI,                           HVM_DELIVER_NO_ERROR_CODE);}static void vmx_inject_exception(    unsigned int trapnr, int errcode, unsigned long cr2){    struct vcpu *curr = current;    vmx_inject_hw_exception(curr, trapnr, errcode);    if ( trapnr == TRAP_page_fault )        curr->arch.hvm_vcpu.guest_cr[2] = cr2;    if ( (trapnr == TRAP_debug) &&         (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )    {        __restore_debug_registers(curr);        write_debugreg(6, read_debugreg(6) | 0x4000);    }}static int vmx_event_pending(struct vcpu *v){    ASSERT(v == current);    return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);}static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs){    return vpmu_do_interrupt(regs);}static void vmx_set_uc_mode(struct vcpu *v){    if ( paging_mode_hap(v->domain) )        ept_change_entry_emt_with_range(            v->domain, 0, v->domain->arch.p2m->max_mapped_pfn);    vpid_sync_all();}static struct hvm_function_table vmx_function_table = {    .name                 = "VMX",    .domain_initialise    = vmx_domain_initialise,    .domain_destroy       = vmx_domain_destroy,    .vcpu_initialise      = vmx_vcpu_initialise,    .vcpu_destroy         = vmx_vcpu_destroy,    .save_cpu_ctxt        = vmx_save_vmcs_ctxt,    .load_cpu_ctxt        = vmx_load_vmcs_ctxt,    .get_interrupt_shadow = vmx_get_interrupt_shadow,    .set_interrupt_shadow = vmx_set_interrupt_shadow,    .guest_x86_mode       = vmx_guest_x86_mode,    .get_segment_register = vmx_get_segment_register,    .set_segment_register = vmx_set_segment_register,    .update_host_cr3      = vmx_update_host_cr3,    .update_guest_cr      = vmx_update_guest_cr,    .update_guest_efer    = vmx_update_guest_efer,    .flush_guest_tlbs     = vmx_flush_guest_tlbs,    .set_tsc_offset       = vmx_set_tsc_offset,    .inject_exception     = vmx_inject_exception,    .init_hypercall_page  = vmx_init_hypercall_page,    .event_pending        = vmx_event_pending,    .do_pmu_interrupt     = vmx_do_pmu_interrupt,    .cpu_up               = vmx_cpu_up,    .cpu_down             = vmx_cpu_down,    .cpuid_intercept      = vmx_cpuid_intercept,    .wbinvd_intercept     = vmx_wbinvd_intercept,    .fpu_dirty_intercept  = vmx_fpu_dirty_intercept,    .msr_read_intercept   = vmx_msr_read_intercept,    .msr_write_intercept  = vmx_msr_write_intercept,    .invlpg_intercept     = vmx_invlpg_intercept,    .set_uc_mode          = vmx_set_uc_mode};static unsigned long *vpid_bitmap;#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS)void start_vmx(void){    static int bootstrapped;    vmx_save_host_msrs();    if ( bootstrapped )    {        if ( hvm_enabled && !vmx_cpu_up() )        {            printk("VMX: FATAL: failed to initialise CPU%d!\n",                   smp_processor_id());            BUG();        }        return;    }    bootstrapped = 1;    /* Xen does not fill x86_capability words except 0. */    boot_cpu_data.x86_capability[4] = cpuid_ecx(1);    if ( !test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability) )        return;    set_in_cr4(X86_CR4_VMXE);    if ( !vmx_cpu_up() )    {        printk("VMX: failed to initialise.\n");        return;    }    if ( cpu_has_vmx_ept )    {        printk("VMX: EPT is available.\n");        vmx_function_table.hap_supported = 1;    }    if ( cpu_has_vmx_vpid )    {        printk("VMX: VPID is available.\n");        vpid_bitmap = xmalloc_array(            unsigned long, BITS_TO_LONGS(VPID_BITMAP_SIZE));        BUG_ON(vpid_bitmap == NULL);        memset(vpid_bitmap, 0, BITS_TO_LONGS(VPID_BITMAP_SIZE) * sizeof(long));        /* VPID 0 is used by VMX root mode (the hypervisor). */        __set_bit(0, vpid_bitmap);    }    setup_vmcs_dump();    hvm_enable(&vmx_function_table);}/* * Not all cases receive valid value in the VM-exit instruction length field. * Callers must know what they're doing! */static int __get_instruction_length(void){    int len;    len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */    BUG_ON((len < 1) || (len > 15));    return len;}static void __update_guest_eip(unsigned long inst_len){    struct cpu_user_regs *regs = guest_cpu_user_regs();    unsigned long x;    regs->eip += inst_len;    regs->eflags &= ~X86_EFLAGS_RF;    x = __vmread(GUEST_INTERRUPTIBILITY_INFO);    if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )    {        x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);    }    if ( regs->eflags & X86_EFLAGS_TF )        vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);}static void vmx_fpu_dirty_intercept(void){    struct vcpu *curr = current;    vmx_fpu_enter(curr);    /* Disable TS in guest CR0 unless the guest wants the exception too. */    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )    {        curr->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS;        __vmwrite(GUEST_CR0, curr->arch.hvm_vcpu.hw_cr[0]);    }}#define bitmaskof(idx)  (1U << ((idx) & 31))static void vmx_cpuid_intercept(    unsigned int *eax, unsigned int *ebx,    unsigned int *ecx, unsigned int *edx){    unsigned int input = *eax;    struct segment_register cs;    struct vcpu *v = current;    hvm_cpuid(input, eax, ebx, ecx, edx);    switch ( input )    {        case 0x80000001:            /* SYSCALL is visible iff running in long mode. */            hvm_get_segment_register(v, x86_seg_cs, &cs);            if ( cs.attr.fields.l )                *edx |= bitmaskof(X86_FEATURE_SYSCALL);            else                *edx &= ~(bitmaskof(X86_FEATURE_SYSCALL));            break;    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?