📄 svm.c

📁 xen虚拟机源代码安装包
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * svm.c: handling SVM architecture-related VM exits * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/hypercall.h>#include <xen/domain_page.h>#include <asm/current.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/p2m.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/debugreg.h>#include <asm/msr.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/support.h>#include <asm/hvm/io.h>#include <asm/hvm/svm/asid.h>#include <asm/hvm/svm/svm.h>#include <asm/hvm/svm/vmcb.h>#include <asm/hvm/svm/emulate.h>#include <asm/hvm/svm/intr.h>#include <asm/x86_emulate.h>#include <public/sched.h>#include <asm/hvm/vpt.h>#include <asm/hvm/trace.h>#include <asm/hap.h>u32 svm_feature_flags;#define set_segment_register(name, value)  \    asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };asmlinkage void do_IRQ(struct cpu_user_regs *);static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);static void svm_update_guest_efer(struct vcpu *v);static void svm_inject_exception(    unsigned int trapnr, int errcode, unsigned long cr2);static void svm_cpuid_intercept(    unsigned int *eax, unsigned int *ebx,    unsigned int *ecx, unsigned int *edx);static void svm_wbinvd_intercept(void);static void svm_fpu_dirty_intercept(void);static int svm_msr_read_intercept(struct cpu_user_regs *regs);static int svm_msr_write_intercept(struct cpu_user_regs *regs);static void svm_invlpg_intercept(unsigned long vaddr);/* va of hardware host save area     */static void *hsa[NR_CPUS] __read_mostly;/* vmcb used for extended host state */static void *root_vmcb[NR_CPUS] __read_mostly;static void inline __update_guest_eip(    struct cpu_user_regs *regs, unsigned int inst_len){    struct vcpu *curr = current;    if ( unlikely(inst_len == 0) )        return;    if ( unlikely(inst_len > 15) )    {        gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);        domain_crash(curr->domain);        return;    }    ASSERT(regs == guest_cpu_user_regs());    regs->eip += inst_len;    regs->eflags &= ~X86_EFLAGS_RF;    curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;    if ( regs->eflags & X86_EFLAGS_TF )        svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);}static void svm_cpu_down(void){    write_efer(read_efer() & ~EFER_SVME);}static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs){    u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);    u32 ecx = regs->ecx;    HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,                ecx, msr_content);    switch ( ecx )    {    case MSR_EFER:        if ( hvm_set_efer(msr_content) )            return HNDL_exception_raised;        break;    case MSR_IA32_MC4_MISC: /* Threshold register */    case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:        /*         * MCA/MCE: Threshold register is reported to be locked, so we ignore         * all write accesses. This behaviour matches real HW, so guests should         * have no problem with this.         */        break;    default:        return HNDL_unhandled;    }    return HNDL_done;}static void svm_save_dr(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    if ( !v->arch.hvm_vcpu.flag_dr_dirty )        return;    /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */    v->arch.hvm_vcpu.flag_dr_dirty = 0;    v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;    v->arch.guest_context.debugreg[0] = read_debugreg(0);    v->arch.guest_context.debugreg[1] = read_debugreg(1);    v->arch.guest_context.debugreg[2] = read_debugreg(2);    v->arch.guest_context.debugreg[3] = read_debugreg(3);    v->arch.guest_context.debugreg[6] = vmcb->dr6;    v->arch.guest_context.debugreg[7] = vmcb->dr7;}static void __restore_debug_registers(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    if ( v->arch.hvm_vcpu.flag_dr_dirty )        return;    v->arch.hvm_vcpu.flag_dr_dirty = 1;    vmcb->dr_intercepts = 0;    write_debugreg(0, v->arch.guest_context.debugreg[0]);    write_debugreg(1, v->arch.guest_context.debugreg[1]);    write_debugreg(2, v->arch.guest_context.debugreg[2]);    write_debugreg(3, v->arch.guest_context.debugreg[3]);    vmcb->dr6 = v->arch.guest_context.debugreg[6];    vmcb->dr7 = v->arch.guest_context.debugreg[7];}/* * DR7 is saved and restored on every vmexit.  Other debug registers only * need to be restored if their value is going to affect execution -- i.e., * if one of the breakpoints is enabled.  So mask out all bits that don't * enable some breakpoint functionality. */static void svm_restore_dr(struct vcpu *v){    if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )        __restore_debug_registers(v);}static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    c->cr0 = v->arch.hvm_vcpu.guest_cr[0];    c->cr2 = v->arch.hvm_vcpu.guest_cr[2];    c->cr3 = v->arch.hvm_vcpu.guest_cr[3];    c->cr4 = v->arch.hvm_vcpu.guest_cr[4];    c->sysenter_cs = vmcb->sysenter_cs;    c->sysenter_esp = vmcb->sysenter_esp;    c->sysenter_eip = vmcb->sysenter_eip;    c->pending_event = 0;    c->error_code = 0;    if ( vmcb->eventinj.fields.v &&         hvm_event_needs_reinjection(vmcb->eventinj.fields.type,                                     vmcb->eventinj.fields.vector) )    {        c->pending_event = (uint32_t)vmcb->eventinj.bytes;        c->error_code = vmcb->eventinj.fields.errorcode;    }    return 1;}static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c){    unsigned long mfn = 0;    p2m_type_t p2mt;    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    if ( c->pending_valid &&         ((c->pending_type == 1) || (c->pending_type > 6) ||          (c->pending_reserved != 0)) )    {        gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",                 c->pending_event);        return -EINVAL;    }    if ( !paging_mode_hap(v->domain) )    {        if ( c->cr0 & X86_CR0_PG )        {            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )            {                gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",                         c->cr3);                return -EINVAL;            }        }        if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )            put_page(pagetable_get_page(v->arch.guest_table));        v->arch.guest_table = pagetable_from_pfn(mfn);    }    v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;    v->arch.hvm_vcpu.guest_cr[2] = c->cr2;    v->arch.hvm_vcpu.guest_cr[3] = c->cr3;    v->arch.hvm_vcpu.guest_cr[4] = c->cr4;    svm_update_guest_cr(v, 0);    svm_update_guest_cr(v, 2);    svm_update_guest_cr(v, 4);    vmcb->sysenter_cs =  c->sysenter_cs;    vmcb->sysenter_esp = c->sysenter_esp;    vmcb->sysenter_eip = c->sysenter_eip;    if ( paging_mode_hap(v->domain) )    {        vmcb->np_enable = 1;        vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */        vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);    }    if ( c->pending_valid )     {        gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",                 c->pending_event, c->error_code);        if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )        {            vmcb->eventinj.bytes = c->pending_event;            vmcb->eventinj.fields.errorcode = c->error_code;        }    }    paging_update_paging_modes(v);    return 0;}        static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    data->shadow_gs        = vmcb->kerngsbase;    data->msr_lstar        = vmcb->lstar;    data->msr_star         = vmcb->star;    data->msr_cstar        = vmcb->cstar;    data->msr_syscall_mask = vmcb->sfmask;    data->msr_efer         = v->arch.hvm_vcpu.guest_efer;    data->msr_flags        = -1ULL;    data->tsc = hvm_get_guest_tsc(v);}static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    vmcb->kerngsbase = data->shadow_gs;    vmcb->lstar      = data->msr_lstar;    vmcb->star       = data->msr_star;    vmcb->cstar      = data->msr_cstar;    vmcb->sfmask     = data->msr_syscall_mask;    v->arch.hvm_vcpu.guest_efer = data->msr_efer;    svm_update_guest_efer(v);    hvm_set_guest_tsc(v, data->tsc);}static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt){    svm_save_cpu_state(v, ctxt);    svm_vmcb_save(v, ctxt);}static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt){    svm_load_cpu_state(v, ctxt);    if (svm_vmcb_restore(v, ctxt)) {        printk("svm_vmcb restore failed!\n");        domain_crash(v->domain);        return -EINVAL;    }    return 0;}static void svm_fpu_enter(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    setup_fpu(v);    vmcb->exception_intercepts &= ~(1U << TRAP_no_device);}static void svm_fpu_leave(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    ASSERT(!v->fpu_dirtied);    ASSERT(read_cr0() & X86_CR0_TS);    /*     * If the guest does not have TS enabled then we must cause and handle an      * exception on first use of the FPU. If the guest *does* have TS enabled      * then this is not necessary: no FPU activity can occur until the guest      * clears CR0.TS, and we will initialise the FPU when that happens.     */    if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )    {        v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;        vmcb->cr0 |= X86_CR0_TS;    }}static unsigned int svm_get_interrupt_shadow(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    unsigned int intr_shadow = 0;    if ( vmcb->interrupt_shadow )        intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI;    if ( vmcb->general1_intercepts & GENERAL1_INTERCEPT_IRET )        intr_shadow |= HVM_INTR_SHADOW_NMI;    return intr_shadow;}static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    vmcb->interrupt_shadow =        !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI));    vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;    if ( intr_shadow & HVM_INTR_SHADOW_NMI )        vmcb->general1_intercepts |= GENERAL1_INTERCEPT_IRET;}static int svm_guest_x86_mode(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )        return 0;    if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )        return 1;    if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )        return 8;    return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);}static void svm_update_host_cr3(struct vcpu *v){    /* SVM doesn't have a HOST_CR3 equivalent to update. */}static void svm_update_guest_cr(struct vcpu *v, unsigned int cr){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    switch ( cr )    {    case 0: {        unsigned long hw_cr0_mask = 0;        if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )        {            if ( v != current )                hw_cr0_mask |= X86_CR0_TS;            else if ( vmcb->cr0 & X86_CR0_TS )                svm_fpu_enter(v);        }        vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;        if ( !paging_mode_hap(v->domain) )            vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;        break;    }    case 2:        vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];        break;    case 3:        vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];        svm_asid_inv_asid(v);        break;    case 4:        vmcb->cr4 = HVM_CR4_HOST_MASK;        if ( paging_mode_hap(v->domain) )            vmcb->cr4 &= ~X86_CR4_PAE;        vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];        break;    default:        BUG();    }}static void svm_update_guest_efer(struct vcpu *v){    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;    vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;    if ( vmcb->efer & EFER_LMA )        vmcb->efer |= EFER_LME;}static void svm_flush_guest_tlbs(void){    /* Roll over the CPU's ASID generation, so it gets a clean TLB when we     * next VMRUN.  (If ASIDs are disabled, the whole TLB is flushed on     * VMRUN anyway). */    svm_asid_inc_generation();}static void svm_sync_vmcb(struct vcpu *v){    struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;    if ( arch_svm->vmcb_in_sync )        return;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -