kvm_main.c

来自「linux 内核源代码」· C语言 代码 · 共 2,825 行 · 第 1/5 页

C
2,825
字号
		       sizeof sregs->interrupt_bitmap);		pending_vec = kvm_x86_ops->get_irq(vcpu);		if (pending_vec >= 0)			set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);	} else		memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,		       sizeof sregs->interrupt_bitmap);	vcpu_put(vcpu);	return 0;}static void set_segment(struct kvm_vcpu *vcpu,			struct kvm_segment *var, int seg){	return kvm_x86_ops->set_segment(vcpu, var, seg);}static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,				    struct kvm_sregs *sregs){	int mmu_reset_needed = 0;	int i, pending_vec, max_bits;	struct descriptor_table dt;	vcpu_load(vcpu);	dt.limit = sregs->idt.limit;	dt.base = sregs->idt.base;	kvm_x86_ops->set_idt(vcpu, &dt);	dt.limit = sregs->gdt.limit;	dt.base = sregs->gdt.base;	kvm_x86_ops->set_gdt(vcpu, &dt);	vcpu->cr2 = sregs->cr2;	mmu_reset_needed |= vcpu->cr3 != sregs->cr3;	vcpu->cr3 = sregs->cr3;	set_cr8(vcpu, sregs->cr8);	mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;#ifdef CONFIG_X86_64	kvm_x86_ops->set_efer(vcpu, sregs->efer);#endif	kvm_set_apic_base(vcpu, sregs->apic_base);	kvm_x86_ops->decache_cr4_guest_bits(vcpu);	mmu_reset_needed |= vcpu->cr0 != sregs->cr0;	vcpu->cr0 = sregs->cr0;	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);	mmu_reset_needed |= vcpu->cr4 != sregs->cr4;	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);	if (!is_long_mode(vcpu) && is_pae(vcpu))		load_pdptrs(vcpu, vcpu->cr3);	if (mmu_reset_needed)		kvm_mmu_reset_context(vcpu);	if (!irqchip_in_kernel(vcpu->kvm)) {		memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,		       sizeof vcpu->irq_pending);		vcpu->irq_summary = 0;		for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)			if (vcpu->irq_pending[i])				__set_bit(i, &vcpu->irq_summary);	} else {		max_bits = (sizeof sregs->interrupt_bitmap) << 3;		pending_vec = find_first_bit(			(const unsigned long *)sregs->interrupt_bitmap,			max_bits);		/* Only pending external irq is handled here */		if (pending_vec < max_bits) {			kvm_x86_ops->set_irq(vcpu, pending_vec);			printk("Set back pending irq %d\n", pending_vec);		}	}	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);	set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);	set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);	set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);	set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);	set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);	vcpu_put(vcpu);	return 0;}void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l){	struct kvm_segment cs;	get_segment(vcpu, &cs, VCPU_SREG_CS);	*db = cs.db;	*l = cs.l;}EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);/* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. * * This list is modified at module load time to reflect the * capabilities of the host cpu. */static u32 msrs_to_save[] = {	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,	MSR_K6_STAR,#ifdef CONFIG_X86_64	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,#endif	MSR_IA32_TIME_STAMP_COUNTER,};static unsigned num_msrs_to_save;static u32 emulated_msrs[] = {	MSR_IA32_MISC_ENABLE,};static __init void kvm_init_msr_list(void){	u32 dummy[2];	unsigned i, j;	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)			continue;		if (j < i)			msrs_to_save[j] = msrs_to_save[i];		j++;	}	num_msrs_to_save = j;}/* * Adapt set_msr() to msr_io()'s calling convention */static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data){	return kvm_set_msr(vcpu, index, *data);}/* * Read or write a bunch of msrs. All parameters are kernel addresses. * * @return number of msrs set successfully. */static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,		    struct kvm_msr_entry *entries,		    int (*do_msr)(struct kvm_vcpu *vcpu,				  unsigned index, u64 *data)){	int i;	vcpu_load(vcpu);	for (i = 0; i < msrs->nmsrs; ++i)		if (do_msr(vcpu, entries[i].index, &entries[i].data))			break;	vcpu_put(vcpu);	return i;}/* * Read or write a bunch of msrs. Parameters are user addresses. * * @return number of msrs set successfully. */static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,		  int (*do_msr)(struct kvm_vcpu *vcpu,				unsigned index, u64 *data),		  int writeback){	struct kvm_msrs msrs;	struct kvm_msr_entry *entries;	int r, n;	unsigned size;	r = -EFAULT;	if (copy_from_user(&msrs, user_msrs, sizeof msrs))		goto out;	r = -E2BIG;	if (msrs.nmsrs >= MAX_IO_MSRS)		goto out;	r = -ENOMEM;	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;	entries = vmalloc(size);	if (!entries)		goto out;	r = -EFAULT;	if (copy_from_user(entries, user_msrs->entries, size))		goto out_free;	r = n = __msr_io(vcpu, &msrs, entries, do_msr);	if (r < 0)		goto out_free;	r = -EFAULT;	if (writeback && copy_to_user(user_msrs->entries, entries, size))		goto out_free;	r = n;out_free:	vfree(entries);out:	return r;}/* * Translate a guest virtual address to a guest physical address. */static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,				    struct kvm_translation *tr){	unsigned long vaddr = tr->linear_address;	gpa_t gpa;	vcpu_load(vcpu);	mutex_lock(&vcpu->kvm->lock);	gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);	tr->physical_address = gpa;	tr->valid = gpa != UNMAPPED_GVA;	tr->writeable = 1;	tr->usermode = 0;	mutex_unlock(&vcpu->kvm->lock);	vcpu_put(vcpu);	return 0;}static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,				    struct kvm_interrupt *irq){	if (irq->irq < 0 || irq->irq >= 256)		return -EINVAL;	if (irqchip_in_kernel(vcpu->kvm))		return -ENXIO;	vcpu_load(vcpu);	set_bit(irq->irq, vcpu->irq_pending);	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);	vcpu_put(vcpu);	return 0;}static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,				      struct kvm_debug_guest *dbg){	int r;	vcpu_load(vcpu);	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);	vcpu_put(vcpu);	return r;}static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,				    unsigned long address,				    int *type){	struct kvm_vcpu *vcpu = vma->vm_file->private_data;	unsigned long pgoff;	struct page *page;	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;	if (pgoff == 0)		page = virt_to_page(vcpu->run);	else if (pgoff == KVM_PIO_PAGE_OFFSET)		page = virt_to_page(vcpu->pio_data);	else		return NOPAGE_SIGBUS;	get_page(page);	if (type != NULL)		*type = VM_FAULT_MINOR;	return page;}static struct vm_operations_struct kvm_vcpu_vm_ops = {	.nopage = kvm_vcpu_nopage,};static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma){	vma->vm_ops = &kvm_vcpu_vm_ops;	return 0;}static int kvm_vcpu_release(struct inode *inode, struct file *filp){	struct kvm_vcpu *vcpu = filp->private_data;	fput(vcpu->kvm->filp);	return 0;}static struct file_operations kvm_vcpu_fops = {	.release        = kvm_vcpu_release,	.unlocked_ioctl = kvm_vcpu_ioctl,	.compat_ioctl   = kvm_vcpu_ioctl,	.mmap           = kvm_vcpu_mmap,};/* * Allocates an inode for the vcpu. */static int create_vcpu_fd(struct kvm_vcpu *vcpu){	int fd, r;	struct inode *inode;	struct file *file;	r = anon_inode_getfd(&fd, &inode, &file,			     "kvm-vcpu", &kvm_vcpu_fops, vcpu);	if (r)		return r;	atomic_inc(&vcpu->kvm->filp->f_count);	return fd;}/* * Creates some virtual cpus.  Good luck creating more than one. */static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n){	int r;	struct kvm_vcpu *vcpu;	if (!valid_vcpu(n))		return -EINVAL;	vcpu = kvm_x86_ops->vcpu_create(kvm, n);	if (IS_ERR(vcpu))		return PTR_ERR(vcpu);	preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);	/* We do fxsave: this must be aligned. */	BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);	vcpu_load(vcpu);	r = kvm_mmu_setup(vcpu);	vcpu_put(vcpu);	if (r < 0)		goto free_vcpu;	mutex_lock(&kvm->lock);	if (kvm->vcpus[n]) {		r = -EEXIST;		mutex_unlock(&kvm->lock);		goto mmu_unload;	}	kvm->vcpus[n] = vcpu;	mutex_unlock(&kvm->lock);	/* Now it's all set up, let userspace reach it */	r = create_vcpu_fd(vcpu);	if (r < 0)		goto unlink;	return r;unlink:	mutex_lock(&kvm->lock);	kvm->vcpus[n] = NULL;	mutex_unlock(&kvm->lock);mmu_unload:	vcpu_load(vcpu);	kvm_mmu_unload(vcpu);	vcpu_put(vcpu);free_vcpu:	kvm_x86_ops->vcpu_free(vcpu);	return r;}static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu){	u64 efer;	int i;	struct kvm_cpuid_entry *e, *entry;	rdmsrl(MSR_EFER, efer);	entry = NULL;	for (i = 0; i < vcpu->cpuid_nent; ++i) {		e = &vcpu->cpuid_entries[i];		if (e->function == 0x80000001) {			entry = e;			break;		}	}	if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {		entry->edx &= ~(1 << 20);		printk(KERN_INFO "kvm: guest NX capability removed\n");	}}static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,				    struct kvm_cpuid *cpuid,				    struct kvm_cpuid_entry __user *entries){	int r;	r = -E2BIG;	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)		goto out;	r = -EFAULT;	if (copy_from_user(&vcpu->cpuid_entries, entries,			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))		goto out;	vcpu->cpuid_nent = cpuid->nent;	cpuid_fix_nx_cap(vcpu);	return 0;out:	return r;}static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset){	if (sigset) {		sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));		vcpu->sigset_active = 1;		vcpu->sigset = *sigset;	} else		vcpu->sigset_active = 0;	return 0;}/* * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when * we have asm/x86/processor.h */struct fxsave {	u16	cwd;	u16	swd;	u16	twd;	u16	fop;	u64	rip;	u64	rdp;	u32	mxcsr;	u32	mxcsr_mask;	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */#ifdef CONFIG_X86_64	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */#else	u32	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */#endif};static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu){	struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;	vcpu_load(vcpu);	memcpy(fpu->fpr, fxsave->st_space, 128);	fpu->fcw = fxsave->cwd;	fpu->fsw = fxsave->swd;	fpu->ftwx = fxsave->twd;	fpu->last_opcode = fxsave->fop;	fpu->last_ip = fxsave->rip;	fpu->last_dp = fxsave->rdp;	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);	vcpu_put(vcpu);	return 0;}static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu){	struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;	vcpu_load(vcpu);	memcpy(fxsave->st_space, fpu->fpr, 128);	fxsave->cwd = fpu->fcw;	fxsave->swd = fpu->fsw;	fxsave->twd = fpu->ftwx;	fxsave->fop = fpu->last_opcode;	fxsave->rip = fpu->last_ip;	fxsave->rdp = fpu->last_dp;	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);	vcpu_put(vcpu);	return 0;}static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,				    struct kvm_lapic_state *s){	vcpu_load(vcpu);	memcpy(s->regs, vcpu->apic->regs, sizeof *s);	vcpu_put(vcpu);	return 0;}static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,				    struct kvm_lapic_state *s){	vcpu_load(vcpu);	memcpy(vcpu->apic->regs, s->regs, sizeof *s);	kvm_apic_post_state_restore(vcpu);	vcpu_put(vcpu);	return 0;}static long kvm_vcpu_ioctl(struct file *filp,			   unsigned int ioctl, unsigned long arg){	struct kvm_vcpu *vcpu = filp->private_data;	void __user *argp = (void __user *)arg;	int r = -EINVAL;	switch (ioctl) {	case KVM_RUN:		r = -EINVAL;		if (arg)			goto out;		r = kvm_vcpu_ioctl_run(vcpu, vcpu->run);		break;	case KVM_GET_REGS: {		struct kvm_regs kvm_regs;		memset(&kvm_regs, 0, sizeof kvm_regs);		r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs);		if (r)			goto out;		r = -EFAULT;		if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))			goto out;		r = 0;		break;	}	case KVM_SET_REGS: {		struct kvm_regs kvm_regs;		r = -EFAULT;		if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))			goto out;		r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs);		if 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?