📄 svm.c
字号:
/* * svm.c: handling SVM architecture-related VM exits * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005-2007, Advanced Micro Devices, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/hypercall.h>#include <xen/domain_page.h>#include <asm/current.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/p2m.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/debugreg.h>#include <asm/msr.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/support.h>#include <asm/hvm/io.h>#include <asm/hvm/svm/asid.h>#include <asm/hvm/svm/svm.h>#include <asm/hvm/svm/vmcb.h>#include <asm/hvm/svm/emulate.h>#include <asm/hvm/svm/intr.h>#include <asm/x86_emulate.h>#include <public/sched.h>#include <asm/hvm/vpt.h>#include <asm/hvm/trace.h>#include <asm/hap.h>u32 svm_feature_flags;#define set_segment_register(name, value) \ asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len);asmlinkage void do_IRQ(struct cpu_user_regs *);static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);static void svm_update_guest_efer(struct vcpu *v);static void svm_inject_exception( unsigned int trapnr, int errcode, unsigned long cr2);/* va of hardware host save area */static void *hsa[NR_CPUS] __read_mostly;/* vmcb used for extended host state */static void *root_vmcb[NR_CPUS] __read_mostly;static void inline __update_guest_eip( struct cpu_user_regs *regs, unsigned int inst_len){ struct vcpu *curr = current; if ( unlikely(inst_len == 0) ) return; if ( unlikely(inst_len > 15) ) { gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len); domain_crash(curr->domain); return; } ASSERT(regs == guest_cpu_user_regs()); regs->eip += inst_len; regs->eflags &= ~X86_EFLAGS_RF; curr->arch.hvm_svm.vmcb->interrupt_shadow = 0; if ( regs->eflags & X86_EFLAGS_TF ) svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);}static void svm_cpu_down(void){ write_efer(read_efer() & ~EFER_SVME);}static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs){ u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32); u32 ecx = regs->ecx; HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64, ecx, msr_content); switch ( ecx ) { case MSR_EFER: if ( !hvm_set_efer(msr_content) ) return HNDL_exception_raised; break; case MSR_IA32_MC4_MISC: /* Threshold register */ case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: /* * MCA/MCE: Threshold register is reported to be locked, so we ignore * all write accesses. This behaviour matches real HW, so guests should * have no problem with this. */ break; default: return HNDL_unhandled; } return HNDL_done;}static void svm_save_dr(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( !v->arch.hvm_vcpu.flag_dr_dirty ) return; /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */ v->arch.hvm_vcpu.flag_dr_dirty = 0; v->arch.hvm_svm.vmcb->dr_intercepts = ~0u; v->arch.guest_context.debugreg[0] = read_debugreg(0); v->arch.guest_context.debugreg[1] = read_debugreg(1); v->arch.guest_context.debugreg[2] = read_debugreg(2); v->arch.guest_context.debugreg[3] = read_debugreg(3); v->arch.guest_context.debugreg[6] = vmcb->dr6; v->arch.guest_context.debugreg[7] = vmcb->dr7;}static void __restore_debug_registers(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( v->arch.hvm_vcpu.flag_dr_dirty ) return; v->arch.hvm_vcpu.flag_dr_dirty = 1; vmcb->dr_intercepts = 0; write_debugreg(0, v->arch.guest_context.debugreg[0]); write_debugreg(1, v->arch.guest_context.debugreg[1]); write_debugreg(2, v->arch.guest_context.debugreg[2]); write_debugreg(3, v->arch.guest_context.debugreg[3]); vmcb->dr6 = v->arch.guest_context.debugreg[6]; vmcb->dr7 = v->arch.guest_context.debugreg[7];}/* * DR7 is saved and restored on every vmexit. Other debug registers only * need to be restored if their value is going to affect execution -- i.e., * if one of the breakpoints is enabled. So mask out all bits that don't * enable some breakpoint functionality. */static void svm_restore_dr(struct vcpu *v){ if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) ) __restore_debug_registers(v);}int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; c->cr0 = v->arch.hvm_vcpu.guest_cr[0]; c->cr2 = v->arch.hvm_vcpu.guest_cr[2]; c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; c->idtr_limit = vmcb->idtr.limit; c->idtr_base = vmcb->idtr.base; c->gdtr_limit = vmcb->gdtr.limit; c->gdtr_base = vmcb->gdtr.base; c->cs_sel = vmcb->cs.sel; c->cs_limit = vmcb->cs.limit; c->cs_base = vmcb->cs.base; c->cs_arbytes = vmcb->cs.attr.bytes; c->ds_sel = vmcb->ds.sel; c->ds_limit = vmcb->ds.limit; c->ds_base = vmcb->ds.base; c->ds_arbytes = vmcb->ds.attr.bytes; c->es_sel = vmcb->es.sel; c->es_limit = vmcb->es.limit; c->es_base = vmcb->es.base; c->es_arbytes = vmcb->es.attr.bytes; c->ss_sel = vmcb->ss.sel; c->ss_limit = vmcb->ss.limit; c->ss_base = vmcb->ss.base; c->ss_arbytes = vmcb->ss.attr.bytes; c->fs_sel = vmcb->fs.sel; c->fs_limit = vmcb->fs.limit; c->fs_base = vmcb->fs.base; c->fs_arbytes = vmcb->fs.attr.bytes; c->gs_sel = vmcb->gs.sel; c->gs_limit = vmcb->gs.limit; c->gs_base = vmcb->gs.base; c->gs_arbytes = vmcb->gs.attr.bytes; c->tr_sel = vmcb->tr.sel; c->tr_limit = vmcb->tr.limit; c->tr_base = vmcb->tr.base; c->tr_arbytes = vmcb->tr.attr.bytes; c->ldtr_sel = vmcb->ldtr.sel; c->ldtr_limit = vmcb->ldtr.limit; c->ldtr_base = vmcb->ldtr.base; c->ldtr_arbytes = vmcb->ldtr.attr.bytes; c->sysenter_cs = vmcb->sysenter_cs; c->sysenter_esp = vmcb->sysenter_esp; c->sysenter_eip = vmcb->sysenter_eip; c->pending_event = 0; c->error_code = 0; if ( vmcb->eventinj.fields.v && hvm_event_needs_reinjection(vmcb->eventinj.fields.type, vmcb->eventinj.fields.vector) ) { c->pending_event = (uint32_t)vmcb->eventinj.bytes; c->error_code = vmcb->eventinj.fields.errorcode; } return 1;}int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c){ unsigned long mfn = 0; p2m_type_t p2mt; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( c->pending_valid && ((c->pending_type == 1) || (c->pending_type > 6) || (c->pending_reserved != 0)) ) { gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n", c->pending_event); return -EINVAL; } if ( !paging_mode_hap(v->domain) ) { if ( c->cr0 & X86_CR0_PG ) { mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); return -EINVAL; } } if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG ) put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); } v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; v->arch.hvm_vcpu.guest_cr[2] = c->cr2; v->arch.hvm_vcpu.guest_cr[3] = c->cr3; v->arch.hvm_vcpu.guest_cr[4] = c->cr4; svm_update_guest_cr(v, 0); svm_update_guest_cr(v, 2); svm_update_guest_cr(v, 4);#ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", __func__, c->cr3, c->cr0, c->cr4);#endif vmcb->idtr.limit = c->idtr_limit; vmcb->idtr.base = c->idtr_base; vmcb->gdtr.limit = c->gdtr_limit; vmcb->gdtr.base = c->gdtr_base; vmcb->cs.sel = c->cs_sel; vmcb->cs.limit = c->cs_limit; vmcb->cs.base = c->cs_base; vmcb->cs.attr.bytes = c->cs_arbytes; vmcb->ds.sel = c->ds_sel; vmcb->ds.limit = c->ds_limit; vmcb->ds.base = c->ds_base; vmcb->ds.attr.bytes = c->ds_arbytes; vmcb->es.sel = c->es_sel; vmcb->es.limit = c->es_limit; vmcb->es.base = c->es_base; vmcb->es.attr.bytes = c->es_arbytes; vmcb->ss.sel = c->ss_sel; vmcb->ss.limit = c->ss_limit; vmcb->ss.base = c->ss_base; vmcb->ss.attr.bytes = c->ss_arbytes; vmcb->cpl = vmcb->ss.attr.fields.dpl; vmcb->fs.sel = c->fs_sel; vmcb->fs.limit = c->fs_limit; vmcb->fs.base = c->fs_base; vmcb->fs.attr.bytes = c->fs_arbytes; vmcb->gs.sel = c->gs_sel; vmcb->gs.limit = c->gs_limit; vmcb->gs.base = c->gs_base; vmcb->gs.attr.bytes = c->gs_arbytes; vmcb->tr.sel = c->tr_sel; vmcb->tr.limit = c->tr_limit; vmcb->tr.base = c->tr_base; vmcb->tr.attr.bytes = c->tr_arbytes; vmcb->ldtr.sel = c->ldtr_sel; vmcb->ldtr.limit = c->ldtr_limit; vmcb->ldtr.base = c->ldtr_base; vmcb->ldtr.attr.bytes = c->ldtr_arbytes; vmcb->sysenter_cs = c->sysenter_cs; vmcb->sysenter_esp = c->sysenter_esp; vmcb->sysenter_eip = c->sysenter_eip; if ( paging_mode_hap(v->domain) ) { vmcb->np_enable = 1; vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); } if ( c->pending_valid ) { gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n", c->pending_event, c->error_code); if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) ) { vmcb->eventinj.bytes = c->pending_event; vmcb->eventinj.fields.errorcode = c->error_code; } } paging_update_paging_modes(v); return 0;} static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; data->shadow_gs = vmcb->kerngsbase; data->msr_lstar = vmcb->lstar; data->msr_star = vmcb->star; data->msr_cstar = vmcb->cstar; data->msr_syscall_mask = vmcb->sfmask; data->msr_efer = v->arch.hvm_vcpu.guest_efer; data->msr_flags = -1ULL; data->tsc = hvm_get_guest_time(v);}static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; vmcb->kerngsbase = data->shadow_gs; vmcb->lstar = data->msr_lstar; vmcb->star = data->msr_star; vmcb->cstar = data->msr_cstar; vmcb->sfmask = data->msr_syscall_mask; v->arch.hvm_vcpu.guest_efer = data->msr_efer; svm_update_guest_efer(v); hvm_set_guest_time(v, data->tsc);}static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt){ svm_save_cpu_state(v, ctxt); svm_vmcb_save(v, ctxt);}static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt){ svm_load_cpu_state(v, ctxt); if (svm_vmcb_restore(v, ctxt)) { printk("svm_vmcb restore failed!\n"); domain_crash(v->domain); return -EINVAL; } return 0;}static void svm_fpu_enter(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; setup_fpu(v); vmcb->exception_intercepts &= ~(1U << TRAP_no_device);}static void svm_fpu_leave(struct vcpu *v){ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; ASSERT(!v->fpu_dirtied); ASSERT(read_cr0() & X86_CR0_TS); /* * If the guest does not have TS enabled then we must cause and handle an * exception on first use of the FPU. If the guest *does* have TS enabled * then this is not necessary: no FPU activity can occur until the guest
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -