kvm_main.c

来自「linux 内核源代码」· C语言 代码 · 共 2,825 行 · 第 1/5 页

C
2,825
字号
		return X86EMUL_CONTINUE;	}	vcpu->mmio_needed = 1;	vcpu->mmio_phys_addr = gpa;	vcpu->mmio_size = bytes;	vcpu->mmio_is_write = 1;	memcpy(vcpu->mmio_data, val, bytes);	return X86EMUL_CONTINUE;}int emulator_write_emulated(unsigned long addr,				   const void *val,				   unsigned int bytes,				   struct kvm_vcpu *vcpu){	/* Crossing a page boundary? */	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {		int rc, now;		now = -addr & ~PAGE_MASK;		rc = emulator_write_emulated_onepage(addr, val, now, vcpu);		if (rc != X86EMUL_CONTINUE)			return rc;		addr += now;		val += now;		bytes -= now;	}	return emulator_write_emulated_onepage(addr, val, bytes, vcpu);}EXPORT_SYMBOL_GPL(emulator_write_emulated);static int emulator_cmpxchg_emulated(unsigned long addr,				     const void *old,				     const void *new,				     unsigned int bytes,				     struct kvm_vcpu *vcpu){	static int reported;	if (!reported) {		reported = 1;		printk(KERN_WARNING "kvm: emulating exchange as write\n");	}	return emulator_write_emulated(addr, new, bytes, vcpu);}static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg){	return kvm_x86_ops->get_segment_base(vcpu, seg);}int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address){	return X86EMUL_CONTINUE;}int emulate_clts(struct kvm_vcpu *vcpu){	kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS);	return X86EMUL_CONTINUE;}int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest){	struct kvm_vcpu *vcpu = ctxt->vcpu;	switch (dr) {	case 0 ... 3:		*dest = kvm_x86_ops->get_dr(vcpu, dr);		return X86EMUL_CONTINUE;	default:		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);		return X86EMUL_UNHANDLEABLE;	}}int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value){	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;	int exception;	kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);	if (exception) {		/* FIXME: better handling */		return X86EMUL_UNHANDLEABLE;	}	return X86EMUL_CONTINUE;}void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context){	static int reported;	u8 opcodes[4];	unsigned long rip = vcpu->rip;	unsigned long rip_linear;	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);	if (reported)		return;	emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);	reported = 1;}EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);struct x86_emulate_ops emulate_ops = {	.read_std            = emulator_read_std,	.write_std           = emulator_write_std,	.read_emulated       = emulator_read_emulated,	.write_emulated      = emulator_write_emulated,	.cmpxchg_emulated    = emulator_cmpxchg_emulated,};int emulate_instruction(struct kvm_vcpu *vcpu,			struct kvm_run *run,			unsigned long cr2,			u16 error_code){	struct x86_emulate_ctxt emulate_ctxt;	int r;	int cs_db, cs_l;	vcpu->mmio_fault_cr2 = cr2;	kvm_x86_ops->cache_regs(vcpu);	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);	emulate_ctxt.vcpu = vcpu;	emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);	emulate_ctxt.cr2 = cr2;	emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)		? X86EMUL_MODE_REAL : cs_l		? X86EMUL_MODE_PROT64 :	cs_db		? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;	if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {		emulate_ctxt.cs_base = 0;		emulate_ctxt.ds_base = 0;		emulate_ctxt.es_base = 0;		emulate_ctxt.ss_base = 0;	} else {		emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);		emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);		emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);		emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);	}	emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);	emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);	vcpu->mmio_is_write = 0;	vcpu->pio.string = 0;	r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);	if (vcpu->pio.string)		return EMULATE_DO_MMIO;	if ((r || vcpu->mmio_is_write) && run) {		run->exit_reason = KVM_EXIT_MMIO;		run->mmio.phys_addr = vcpu->mmio_phys_addr;		memcpy(run->mmio.data, vcpu->mmio_data, 8);		run->mmio.len = vcpu->mmio_size;		run->mmio.is_write = vcpu->mmio_is_write;	}	if (r) {		if (kvm_mmu_unprotect_page_virt(vcpu, cr2))			return EMULATE_DONE;		if (!vcpu->mmio_needed) {			kvm_report_emulation_failure(vcpu, "mmio");			return EMULATE_FAIL;		}		return EMULATE_DO_MMIO;	}	kvm_x86_ops->decache_regs(vcpu);	kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags);	if (vcpu->mmio_is_write) {		vcpu->mmio_needed = 0;		return EMULATE_DO_MMIO;	}	return EMULATE_DONE;}EXPORT_SYMBOL_GPL(emulate_instruction);/* * The vCPU has executed a HLT instruction with in-kernel mode enabled. */static void kvm_vcpu_block(struct kvm_vcpu *vcpu){	DECLARE_WAITQUEUE(wait, current);	add_wait_queue(&vcpu->wq, &wait);	/*	 * We will block until either an interrupt or a signal wakes us up	 */	while (!kvm_cpu_has_interrupt(vcpu)	       && !signal_pending(current)	       && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE	       && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {		set_current_state(TASK_INTERRUPTIBLE);		vcpu_put(vcpu);		schedule();		vcpu_load(vcpu);	}	__set_current_state(TASK_RUNNING);	remove_wait_queue(&vcpu->wq, &wait);}int kvm_emulate_halt(struct kvm_vcpu *vcpu){	++vcpu->stat.halt_exits;	if (irqchip_in_kernel(vcpu->kvm)) {		vcpu->mp_state = VCPU_MP_STATE_HALTED;		kvm_vcpu_block(vcpu);		if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)			return -EINTR;		return 1;	} else {		vcpu->run->exit_reason = KVM_EXIT_HLT;		return 0;	}}EXPORT_SYMBOL_GPL(kvm_emulate_halt);int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run){	unsigned long nr, a0, a1, a2, a3, a4, a5, ret;	kvm_x86_ops->cache_regs(vcpu);	ret = -KVM_EINVAL;#ifdef CONFIG_X86_64	if (is_long_mode(vcpu)) {		nr = vcpu->regs[VCPU_REGS_RAX];		a0 = vcpu->regs[VCPU_REGS_RDI];		a1 = vcpu->regs[VCPU_REGS_RSI];		a2 = vcpu->regs[VCPU_REGS_RDX];		a3 = vcpu->regs[VCPU_REGS_RCX];		a4 = vcpu->regs[VCPU_REGS_R8];		a5 = vcpu->regs[VCPU_REGS_R9];	} else#endif	{		nr = vcpu->regs[VCPU_REGS_RBX] & -1u;		a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;		a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;		a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;		a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;		a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;		a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;	}	switch (nr) {	default:		run->hypercall.nr = nr;		run->hypercall.args[0] = a0;		run->hypercall.args[1] = a1;		run->hypercall.args[2] = a2;		run->hypercall.args[3] = a3;		run->hypercall.args[4] = a4;		run->hypercall.args[5] = a5;		run->hypercall.ret = ret;		run->hypercall.longmode = is_long_mode(vcpu);		kvm_x86_ops->decache_regs(vcpu);		return 0;	}	vcpu->regs[VCPU_REGS_RAX] = ret;	kvm_x86_ops->decache_regs(vcpu);	return 1;}EXPORT_SYMBOL_GPL(kvm_hypercall);static u64 mk_cr_64(u64 curr_cr, u32 new_val){	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;}void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base){	struct descriptor_table dt = { limit, base };	kvm_x86_ops->set_gdt(vcpu, &dt);}void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base){	struct descriptor_table dt = { limit, base };	kvm_x86_ops->set_idt(vcpu, &dt);}void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,		   unsigned long *rflags){	lmsw(vcpu, msw);	*rflags = kvm_x86_ops->get_rflags(vcpu);}unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr){	kvm_x86_ops->decache_cr4_guest_bits(vcpu);	switch (cr) {	case 0:		return vcpu->cr0;	case 2:		return vcpu->cr2;	case 3:		return vcpu->cr3;	case 4:		return vcpu->cr4;	default:		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);		return 0;	}}void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,		     unsigned long *rflags){	switch (cr) {	case 0:		set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));		*rflags = kvm_x86_ops->get_rflags(vcpu);		break;	case 2:		vcpu->cr2 = val;		break;	case 3:		set_cr3(vcpu, val);		break;	case 4:		set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));		break;	default:		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);	}}/* * Register the para guest with the host: */static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa){	struct kvm_vcpu_para_state *para_state;	hpa_t para_state_hpa, hypercall_hpa;	struct page *para_state_page;	unsigned char *hypercall;	gpa_t hypercall_gpa;	printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");	printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);	/*	 * Needs to be page aligned:	 */	if (para_state_gpa != PAGE_ALIGN(para_state_gpa))		goto err_gp;	para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);	printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);	if (is_error_hpa(para_state_hpa))		goto err_gp;	mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);	para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);	para_state = kmap(para_state_page);	printk(KERN_DEBUG "....  guest version: %d\n", para_state->guest_version);	printk(KERN_DEBUG "....           size: %d\n", para_state->size);	para_state->host_version = KVM_PARA_API_VERSION;	/*	 * We cannot support guests that try to register themselves	 * with a newer API version than the host supports:	 */	if (para_state->guest_version > KVM_PARA_API_VERSION) {		para_state->ret = -KVM_EINVAL;		goto err_kunmap_skip;	}	hypercall_gpa = para_state->hypercall_gpa;	hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);	printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);	if (is_error_hpa(hypercall_hpa)) {		para_state->ret = -KVM_EINVAL;		goto err_kunmap_skip;	}	printk(KERN_DEBUG "kvm: para guest successfully registered.\n");	vcpu->para_state_page = para_state_page;	vcpu->para_state_gpa = para_state_gpa;	vcpu->hypercall_gpa = hypercall_gpa;	mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);	hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),				KM_USER1) + (hypercall_hpa & ~PAGE_MASK);	kvm_x86_ops->patch_hypercall(vcpu, hypercall);	kunmap_atomic(hypercall, KM_USER1);	para_state->ret = 0;err_kunmap_skip:	kunmap(para_state_page);	return 0;err_gp:	return 1;}int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata){	u64 data;	switch (msr) {	case 0xc0010010: /* SYSCFG */	case 0xc0010015: /* HWCR */	case MSR_IA32_PLATFORM_ID:	case MSR_IA32_P5_MC_ADDR:	case MSR_IA32_P5_MC_TYPE:	case MSR_IA32_MC0_CTL:	case MSR_IA32_MCG_STATUS:	case MSR_IA32_MCG_CAP:	case MSR_IA32_MC0_MISC:	case MSR_IA32_MC0_MISC+4:	case MSR_IA32_MC0_MISC+8:	case MSR_IA32_MC0_MISC+12:	case MSR_IA32_MC0_MISC+16:	case MSR_IA32_UCODE_REV:	case MSR_IA32_PERF_STATUS:	case MSR_IA32_EBL_CR_POWERON:		/* MTRR registers */	case 0xfe:	case 0x200 ... 0x2ff:		data = 0;		break;	case 0xcd: /* fsb frequency */		data = 3;		break;	case MSR_IA32_APICBASE:		data = kvm_get_apic_base(vcpu);		break;	case MSR_IA32_MISC_ENABLE:		data = vcpu->ia32_misc_enable_msr;		break;#ifdef CONFIG_X86_64	case MSR_EFER:		data = vcpu->shadow_efer;		break;#endif	default:		pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);		return 1;	}	*pdata = data;	return 0;}EXPORT_SYMBOL_GPL(kvm_get_msr_common);/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata){	return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);}#ifdef CONFIG_X86_64static void set_efer(struct kvm_vcpu *vcpu, u64 efer){	if (efer & EFER_RESERVED_BITS) {		printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",		       efer);		inject_gp(vcpu);		return;	}	if (is_paging(vcpu)	    && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {		printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");		inject_gp(vcpu);		return;	}	kvm_x86_ops->set_efer(vcpu, efer);	efer &= ~EFER_LMA;	efer |= vcpu->shadow_efer & EFER_LMA;	vcpu->shadow_efer = efer;}#endifint kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data){	switch (msr) {#ifdef CONFIG_X86_64	case MSR_EFER:		set_efer(vcpu, data);		break;#endif	case MSR_IA32_MC0_STATUS:		pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",		       __FUNCTION__, data);		break;	case MSR_IA32_MCG_STATUS:		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",			__FUNCTION__, data);		break;	case MSR_IA32_UCODE_REV:	case MSR_IA32_UCODE_WRITE:	case 0x200 ... 0x2ff: /* MTRRs */		break;	case MSR_IA32_APICBASE:		kvm_set_apic_base(vcpu, data);		break;	case MSR_IA32_MISC_ENABLE:		vcpu->ia32_misc_enable_msr = data;		break;	/*	 * This is the 'probe whether the host is KVM' logic:	 */	case MSR_KVM_API_MAGIC:		return vcpu_register_para(vcpu, data);	default:		pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);		return 1;	}	return 0;}EXPORT_SYMBOL_GPL(kvm_set_msr_common);/* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data){	return kvm_x86_ops->set_msr(vcpu, msr_index, data);}void kvm_resched(struct kvm_vcpu *vcpu){	if (!need_resched())		return;	cond_resched();}EXPORT_SYMBOL_GPL(kvm_resched);void kvm_emulate_cpuid(struct kvm_vcpu *vcpu){	int i;	u32 function;	struct kvm_cpuid_entry *e, *best;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?