📄 svm.c
字号:
/* * svm.c: handling SVM architecture-related VM exits * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/hypercall.h>#include <xen/domain_page.h>#include <asm/current.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/p2m.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/debugreg.h>#include <asm/msr.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/support.h>#include <asm/hvm/io.h>#include <asm/hvm/svm/asid.h>#include <asm/hvm/svm/svm.h>#include <asm/hvm/svm/vmcb.h>#include <asm/hvm/svm/emulate.h>#include <asm/hvm/svm/intr.h>#include <asm/x86_emulate.h>#include <public/sched.h>#include <asm/hvm/vpt.h>#include <asm/hvm/trace.h>#include <asm/hap.h>u32 svm_feature_flags;#define set_segment_register(name, value) \ asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };asmlinkage void do_IRQ(struct cpu_user_regs *);static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);static void svm_update_guest_efer(struct vcpu *v);static void svm_inject_exception( unsigned int trapnr, int errcode, unsigned long cr2);static void svm_cpuid_intercept( unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx);static void svm_wbinvd_intercept(void);static void svm_fpu_dirty_intercept(void);static int svm_msr_read_intercept(struct cpu_user_regs *regs);static int svm_msr_write_intercept(struct cpu_user_regs *regs);static void svm_invlpg_intercept(unsigned long vaddr);/* va of hardware host save area */static void *hsa[NR_CPUS] __read_mostly;/* vmcb used for extended host state */static void *root_vmcb[NR_CPUS] __read_mostly;static void inline __update_guest_eip( struct cpu_user_regs *regs, unsigned int inst_len){ struct vcpu *curr = current; if ( unlikely(inst_len == 0) ) return; if ( unlikely(inst_len > 15) ) { gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len); domain_crash(curr->domain); return; } ASSERT(regs == guest_cpu_user_regs()); regs->eip += inst_len; regs->eflags &= ~X86_EFLAGS_RF; curr->arch.hvm_svm.vmcb->interrupt_shadow = 0; if ( regs->eflags & X86_EFLAGS_TF ) svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);}static void svm_cpu_down(void){ write_efer(read_efer() & ~EFER_SVME);}static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs){ u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32); u32 ecx = regs->ecx; HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64, ecx, msr_content); switch ( ecx ) { case MSR_EFER: if ( hvm_set_efer(msr_content) ) return HNDL_exception_raised; break; case MSR_IA32_MC4_MISC: /* Threshold register */ case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: /* * MCA/MCE: Threshold register is reported to be locked, so we ignore * all write accesses. This behaviour matches real HW, so guests should * have no problem with this. */ break; default: return HNDL_unhandled; } return HNDL_done;}static void svm_save_dr(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( !v->arch.hvm_vcpu.flag_dr_dirty ) return; /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */ v->arch.hvm_vcpu.flag_dr_dirty = 0; v->arch.hvm_svm.vmcb->dr_intercepts = ~0u; v->arch.guest_context.debugreg[0] = read_debugreg(0); v->arch.guest_context.debugreg[1] = read_debugreg(1); v->arch.guest_context.debugreg[2] = read_debugreg(2); v->arch.guest_context.debugreg[3] = read_debugreg(3); v->arch.guest_context.debugreg[6] = vmcb->dr6; v->arch.guest_context.debugreg[7] = vmcb->dr7;}static void __restore_debug_registers(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( v->arch.hvm_vcpu.flag_dr_dirty ) return; v->arch.hvm_vcpu.flag_dr_dirty = 1; vmcb->dr_intercepts = 0; write_debugreg(0, v->arch.guest_context.debugreg[0]); write_debugreg(1, v->arch.guest_context.debugreg[1]); write_debugreg(2, v->arch.guest_context.debugreg[2]); write_debugreg(3, v->arch.guest_context.debugreg[3]); vmcb->dr6 = v->arch.guest_context.debugreg[6]; vmcb->dr7 = v->arch.guest_context.debugreg[7];}/* * DR7 is saved and restored on every vmexit. Other debug registers only * need to be restored if their value is going to affect execution -- i.e., * if one of the breakpoints is enabled. So mask out all bits that don't * enable some breakpoint functionality. */static void svm_restore_dr(struct vcpu *v){ if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) __restore_debug_registers(v);}static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; c->cr0 = v->arch.hvm_vcpu.guest_cr[0]; c->cr2 = v->arch.hvm_vcpu.guest_cr[2]; c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; c->sysenter_cs = vmcb->sysenter_cs; c->sysenter_esp = vmcb->sysenter_esp; c->sysenter_eip = vmcb->sysenter_eip; c->pending_event = 0; c->error_code = 0; if ( vmcb->eventinj.fields.v && hvm_event_needs_reinjection(vmcb->eventinj.fields.type, vmcb->eventinj.fields.vector) ) { c->pending_event = (uint32_t)vmcb->eventinj.bytes; c->error_code = vmcb->eventinj.fields.errorcode; } return 1;}static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c){ unsigned long mfn = 0; p2m_type_t p2mt; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( c->pending_valid && ((c->pending_type == 1) || (c->pending_type > 6) || (c->pending_reserved != 0)) ) { gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n", c->pending_event); return -EINVAL; } if ( !paging_mode_hap(v->domain) ) { if ( c->cr0 & X86_CR0_PG ) { mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); return -EINVAL; } } if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG ) put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); } v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; v->arch.hvm_vcpu.guest_cr[2] = c->cr2; v->arch.hvm_vcpu.guest_cr[3] = c->cr3; v->arch.hvm_vcpu.guest_cr[4] = c->cr4; svm_update_guest_cr(v, 0); svm_update_guest_cr(v, 2); svm_update_guest_cr(v, 4); vmcb->sysenter_cs = c->sysenter_cs; vmcb->sysenter_esp = c->sysenter_esp; vmcb->sysenter_eip = c->sysenter_eip; if ( paging_mode_hap(v->domain) ) { vmcb->np_enable = 1; vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); } if ( c->pending_valid ) { gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n", c->pending_event, c->error_code); if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) ) { vmcb->eventinj.bytes = c->pending_event; vmcb->eventinj.fields.errorcode = c->error_code; } } paging_update_paging_modes(v); return 0;} static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; data->shadow_gs = vmcb->kerngsbase; data->msr_lstar = vmcb->lstar; data->msr_star = vmcb->star; data->msr_cstar = vmcb->cstar; data->msr_syscall_mask = vmcb->sfmask; data->msr_efer = v->arch.hvm_vcpu.guest_efer; data->msr_flags = -1ULL; data->tsc = hvm_get_guest_tsc(v);}static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; vmcb->kerngsbase = data->shadow_gs; vmcb->lstar = data->msr_lstar; vmcb->star = data->msr_star; vmcb->cstar = data->msr_cstar; vmcb->sfmask = data->msr_syscall_mask; v->arch.hvm_vcpu.guest_efer = data->msr_efer; svm_update_guest_efer(v); hvm_set_guest_tsc(v, data->tsc);}static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt){ svm_save_cpu_state(v, ctxt); svm_vmcb_save(v, ctxt);}static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt){ svm_load_cpu_state(v, ctxt); if (svm_vmcb_restore(v, ctxt)) { printk("svm_vmcb restore failed!\n"); domain_crash(v->domain); return -EINVAL; } return 0;}static void svm_fpu_enter(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; setup_fpu(v); vmcb->exception_intercepts &= ~(1U << TRAP_no_device);}static void svm_fpu_leave(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; ASSERT(!v->fpu_dirtied); ASSERT(read_cr0() & X86_CR0_TS); /* * If the guest does not have TS enabled then we must cause and handle an * exception on first use of the FPU. If the guest *does* have TS enabled * then this is not necessary: no FPU activity can occur until the guest * clears CR0.TS, and we will initialise the FPU when that happens. */ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device; vmcb->cr0 |= X86_CR0_TS; }}static unsigned int svm_get_interrupt_shadow(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; unsigned int intr_shadow = 0; if ( vmcb->interrupt_shadow ) intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI; if ( vmcb->general1_intercepts & GENERAL1_INTERCEPT_IRET ) intr_shadow |= HVM_INTR_SHADOW_NMI; return intr_shadow;}static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; vmcb->interrupt_shadow = !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI)); vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET; if ( intr_shadow & HVM_INTR_SHADOW_NMI ) vmcb->general1_intercepts |= GENERAL1_INTERCEPT_IRET;}static int svm_guest_x86_mode(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) ) return 0; if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) ) return 1; if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) ) return 8; return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);}static void svm_update_host_cr3(struct vcpu *v){ /* SVM doesn't have a HOST_CR3 equivalent to update. */}static void svm_update_guest_cr(struct vcpu *v, unsigned int cr){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; switch ( cr ) { case 0: { unsigned long hw_cr0_mask = 0; if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { if ( v != current ) hw_cr0_mask |= X86_CR0_TS; else if ( vmcb->cr0 & X86_CR0_TS ) svm_fpu_enter(v); } vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; if ( !paging_mode_hap(v->domain) ) vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP; break; } case 2: vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2]; break; case 3: vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3]; svm_asid_inv_asid(v); break; case 4: vmcb->cr4 = HVM_CR4_HOST_MASK; if ( paging_mode_hap(v->domain) ) vmcb->cr4 &= ~X86_CR4_PAE; vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4]; break; default: BUG(); }}static void svm_update_guest_efer(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME; if ( vmcb->efer & EFER_LMA ) vmcb->efer |= EFER_LME;}static void svm_flush_guest_tlbs(void){ /* Roll over the CPU's ASID generation, so it gets a clean TLB when we * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on * VMRUN anyway). */ svm_asid_inc_generation();}static void svm_sync_vmcb(struct vcpu *v){ struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; if ( arch_svm->vmcb_in_sync ) return;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -