kvm_main.c
来自「linux 内核源代码」· C语言 代码 · 共 2,825 行 · 第 1/5 页
C
2,825 行
return X86EMUL_CONTINUE; } vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; vcpu->mmio_size = bytes; vcpu->mmio_is_write = 1; memcpy(vcpu->mmio_data, val, bytes); return X86EMUL_CONTINUE;}int emulator_write_emulated(unsigned long addr, const void *val, unsigned int bytes, struct kvm_vcpu *vcpu){ /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; now = -addr & ~PAGE_MASK; rc = emulator_write_emulated_onepage(addr, val, now, vcpu); if (rc != X86EMUL_CONTINUE) return rc; addr += now; val += now; bytes -= now; } return emulator_write_emulated_onepage(addr, val, bytes, vcpu);}EXPORT_SYMBOL_GPL(emulator_write_emulated);static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, unsigned int bytes, struct kvm_vcpu *vcpu){ static int reported; if (!reported) { reported = 1; printk(KERN_WARNING "kvm: emulating exchange as write\n"); } return emulator_write_emulated(addr, new, bytes, vcpu);}static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg){ return kvm_x86_ops->get_segment_base(vcpu, seg);}int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address){ return X86EMUL_CONTINUE;}int emulate_clts(struct kvm_vcpu *vcpu){ kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS); return X86EMUL_CONTINUE;}int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest){ struct kvm_vcpu *vcpu = ctxt->vcpu; switch (dr) { case 0 ... 3: *dest = kvm_x86_ops->get_dr(vcpu, dr); return X86EMUL_CONTINUE; default: pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); return X86EMUL_UNHANDLEABLE; }}int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value){ unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; int exception; kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); if (exception) { /* FIXME: better handling */ return X86EMUL_UNHANDLEABLE; } return X86EMUL_CONTINUE;}void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context){ static int reported; u8 opcodes[4]; unsigned long rip = vcpu->rip; unsigned long rip_linear; rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); if (reported) return; emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); reported = 1;}EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);struct x86_emulate_ops emulate_ops = { .read_std = emulator_read_std, .write_std = emulator_write_std, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated,};int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long cr2, u16 error_code){ struct x86_emulate_ctxt emulate_ctxt; int r; int cs_db, cs_l; vcpu->mmio_fault_cr2 = cr2; kvm_x86_ops->cache_regs(vcpu); kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); emulate_ctxt.vcpu = vcpu; emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); emulate_ctxt.cr2 = cr2; emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_REAL : cs_l ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) { emulate_ctxt.cs_base = 0; emulate_ctxt.ds_base = 0; emulate_ctxt.es_base = 0; emulate_ctxt.ss_base = 0; } else { emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS); emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS); emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES); emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS); } emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS); emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); vcpu->mmio_is_write = 0; vcpu->pio.string = 0; r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); if (vcpu->pio.string) return EMULATE_DO_MMIO; if ((r || vcpu->mmio_is_write) && run) { run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = vcpu->mmio_phys_addr; memcpy(run->mmio.data, vcpu->mmio_data, 8); run->mmio.len = vcpu->mmio_size; run->mmio.is_write = vcpu->mmio_is_write; } if (r) { if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) return EMULATE_DONE; if (!vcpu->mmio_needed) { kvm_report_emulation_failure(vcpu, "mmio"); return EMULATE_FAIL; } return EMULATE_DO_MMIO; } kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags); if (vcpu->mmio_is_write) { vcpu->mmio_needed = 0; return EMULATE_DO_MMIO; } return EMULATE_DONE;}EXPORT_SYMBOL_GPL(emulate_instruction);/* * The vCPU has executed a HLT instruction with in-kernel mode enabled. */static void kvm_vcpu_block(struct kvm_vcpu *vcpu){ DECLARE_WAITQUEUE(wait, current); add_wait_queue(&vcpu->wq, &wait); /* * We will block until either an interrupt or a signal wakes us up */ while (!kvm_cpu_has_interrupt(vcpu) && !signal_pending(current) && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) { set_current_state(TASK_INTERRUPTIBLE); vcpu_put(vcpu); schedule(); vcpu_load(vcpu); } __set_current_state(TASK_RUNNING); remove_wait_queue(&vcpu->wq, &wait);}int kvm_emulate_halt(struct kvm_vcpu *vcpu){ ++vcpu->stat.halt_exits; if (irqchip_in_kernel(vcpu->kvm)) { vcpu->mp_state = VCPU_MP_STATE_HALTED; kvm_vcpu_block(vcpu); if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE) return -EINTR; return 1; } else { vcpu->run->exit_reason = KVM_EXIT_HLT; return 0; }}EXPORT_SYMBOL_GPL(kvm_emulate_halt);int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run){ unsigned long nr, a0, a1, a2, a3, a4, a5, ret; kvm_x86_ops->cache_regs(vcpu); ret = -KVM_EINVAL;#ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { nr = vcpu->regs[VCPU_REGS_RAX]; a0 = vcpu->regs[VCPU_REGS_RDI]; a1 = vcpu->regs[VCPU_REGS_RSI]; a2 = vcpu->regs[VCPU_REGS_RDX]; a3 = vcpu->regs[VCPU_REGS_RCX]; a4 = vcpu->regs[VCPU_REGS_R8]; a5 = vcpu->regs[VCPU_REGS_R9]; } else#endif { nr = vcpu->regs[VCPU_REGS_RBX] & -1u; a0 = vcpu->regs[VCPU_REGS_RAX] & -1u; a1 = vcpu->regs[VCPU_REGS_RCX] & -1u; a2 = vcpu->regs[VCPU_REGS_RDX] & -1u; a3 = vcpu->regs[VCPU_REGS_RSI] & -1u; a4 = vcpu->regs[VCPU_REGS_RDI] & -1u; a5 = vcpu->regs[VCPU_REGS_RBP] & -1u; } switch (nr) { default: run->hypercall.nr = nr; run->hypercall.args[0] = a0; run->hypercall.args[1] = a1; run->hypercall.args[2] = a2; run->hypercall.args[3] = a3; run->hypercall.args[4] = a4; run->hypercall.args[5] = a5; run->hypercall.ret = ret; run->hypercall.longmode = is_long_mode(vcpu); kvm_x86_ops->decache_regs(vcpu); return 0; } vcpu->regs[VCPU_REGS_RAX] = ret; kvm_x86_ops->decache_regs(vcpu); return 1;}EXPORT_SYMBOL_GPL(kvm_hypercall);static u64 mk_cr_64(u64 curr_cr, u32 new_val){ return (curr_cr & ~((1ULL << 32) - 1)) | new_val;}void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base){ struct descriptor_table dt = { limit, base }; kvm_x86_ops->set_gdt(vcpu, &dt);}void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base){ struct descriptor_table dt = { limit, base }; kvm_x86_ops->set_idt(vcpu, &dt);}void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long *rflags){ lmsw(vcpu, msw); *rflags = kvm_x86_ops->get_rflags(vcpu);}unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr){ kvm_x86_ops->decache_cr4_guest_bits(vcpu); switch (cr) { case 0: return vcpu->cr0; case 2: return vcpu->cr2; case 3: return vcpu->cr3; case 4: return vcpu->cr4; default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); return 0; }}void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, unsigned long *rflags){ switch (cr) { case 0: set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); *rflags = kvm_x86_ops->get_rflags(vcpu); break; case 2: vcpu->cr2 = val; break; case 3: set_cr3(vcpu, val); break; case 4: set_cr4(vcpu, mk_cr_64(vcpu->cr4, val)); break; default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); }}/* * Register the para guest with the host: */static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa){ struct kvm_vcpu_para_state *para_state; hpa_t para_state_hpa, hypercall_hpa; struct page *para_state_page; unsigned char *hypercall; gpa_t hypercall_gpa; printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n"); printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa); /* * Needs to be page aligned: */ if (para_state_gpa != PAGE_ALIGN(para_state_gpa)) goto err_gp; para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa); printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa); if (is_error_hpa(para_state_hpa)) goto err_gp; mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); para_state = kmap(para_state_page); printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); printk(KERN_DEBUG ".... size: %d\n", para_state->size); para_state->host_version = KVM_PARA_API_VERSION; /* * We cannot support guests that try to register themselves * with a newer API version than the host supports: */ if (para_state->guest_version > KVM_PARA_API_VERSION) { para_state->ret = -KVM_EINVAL; goto err_kunmap_skip; } hypercall_gpa = para_state->hypercall_gpa; hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa); printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa); if (is_error_hpa(hypercall_hpa)) { para_state->ret = -KVM_EINVAL; goto err_kunmap_skip; } printk(KERN_DEBUG "kvm: para guest successfully registered.\n"); vcpu->para_state_page = para_state_page; vcpu->para_state_gpa = para_state_gpa; vcpu->hypercall_gpa = hypercall_gpa; mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), KM_USER1) + (hypercall_hpa & ~PAGE_MASK); kvm_x86_ops->patch_hypercall(vcpu, hypercall); kunmap_atomic(hypercall, KM_USER1); para_state->ret = 0;err_kunmap_skip: kunmap(para_state_page); return 0;err_gp: return 1;}int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata){ u64 data; switch (msr) { case 0xc0010010: /* SYSCFG */ case 0xc0010015: /* HWCR */ case MSR_IA32_PLATFORM_ID: case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: case MSR_IA32_MC0_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_CAP: case MSR_IA32_MC0_MISC: case MSR_IA32_MC0_MISC+4: case MSR_IA32_MC0_MISC+8: case MSR_IA32_MC0_MISC+12: case MSR_IA32_MC0_MISC+16: case MSR_IA32_UCODE_REV: case MSR_IA32_PERF_STATUS: case MSR_IA32_EBL_CR_POWERON: /* MTRR registers */ case 0xfe: case 0x200 ... 0x2ff: data = 0; break; case 0xcd: /* fsb frequency */ data = 3; break; case MSR_IA32_APICBASE: data = kvm_get_apic_base(vcpu); break; case MSR_IA32_MISC_ENABLE: data = vcpu->ia32_misc_enable_msr; break;#ifdef CONFIG_X86_64 case MSR_EFER: data = vcpu->shadow_efer; break;#endif default: pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); return 1; } *pdata = data; return 0;}EXPORT_SYMBOL_GPL(kvm_get_msr_common);/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata){ return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);}#ifdef CONFIG_X86_64static void set_efer(struct kvm_vcpu *vcpu, u64 efer){ if (efer & EFER_RESERVED_BITS) { printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", efer); inject_gp(vcpu); return; } if (is_paging(vcpu) && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) { printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); inject_gp(vcpu); return; } kvm_x86_ops->set_efer(vcpu, efer); efer &= ~EFER_LMA; efer |= vcpu->shadow_efer & EFER_LMA; vcpu->shadow_efer = efer;}#endifint kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data){ switch (msr) {#ifdef CONFIG_X86_64 case MSR_EFER: set_efer(vcpu, data); break;#endif case MSR_IA32_MC0_STATUS: pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", __FUNCTION__, data); break; case MSR_IA32_MCG_STATUS: pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", __FUNCTION__, data); break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case 0x200 ... 0x2ff: /* MTRRs */ break; case MSR_IA32_APICBASE: kvm_set_apic_base(vcpu, data); break; case MSR_IA32_MISC_ENABLE: vcpu->ia32_misc_enable_msr = data; break; /* * This is the 'probe whether the host is KVM' logic: */ case MSR_KVM_API_MAGIC: return vcpu_register_para(vcpu, data); default: pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr); return 1; } return 0;}EXPORT_SYMBOL_GPL(kvm_set_msr_common);/* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data){ return kvm_x86_ops->set_msr(vcpu, msr_index, data);}void kvm_resched(struct kvm_vcpu *vcpu){ if (!need_resched()) return; cond_resched();}EXPORT_SYMBOL_GPL(kvm_resched);void kvm_emulate_cpuid(struct kvm_vcpu *vcpu){ int i; u32 function; struct kvm_cpuid_entry *e, *best;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?