kvm_main.c
来自「linux 内核源代码」· C语言 代码 · 共 2,825 行 · 第 1/5 页
C
2,825 行
sizeof sregs->interrupt_bitmap); pending_vec = kvm_x86_ops->get_irq(vcpu); if (pending_vec >= 0) set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap); } else memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, sizeof sregs->interrupt_bitmap); vcpu_put(vcpu); return 0;}static void set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg){ return kvm_x86_ops->set_segment(vcpu, var, seg);}static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs){ int mmu_reset_needed = 0; int i, pending_vec, max_bits; struct descriptor_table dt; vcpu_load(vcpu); dt.limit = sregs->idt.limit; dt.base = sregs->idt.base; kvm_x86_ops->set_idt(vcpu, &dt); dt.limit = sregs->gdt.limit; dt.base = sregs->gdt.base; kvm_x86_ops->set_gdt(vcpu, &dt); vcpu->cr2 = sregs->cr2; mmu_reset_needed |= vcpu->cr3 != sregs->cr3; vcpu->cr3 = sregs->cr3; set_cr8(vcpu, sregs->cr8); mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;#ifdef CONFIG_X86_64 kvm_x86_ops->set_efer(vcpu, sregs->efer);#endif kvm_set_apic_base(vcpu, sregs->apic_base); kvm_x86_ops->decache_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->cr0 != sregs->cr0; vcpu->cr0 = sregs->cr0; kvm_x86_ops->set_cr0(vcpu, sregs->cr0); mmu_reset_needed |= vcpu->cr4 != sregs->cr4; kvm_x86_ops->set_cr4(vcpu, sregs->cr4); if (!is_long_mode(vcpu) && is_pae(vcpu)) load_pdptrs(vcpu, vcpu->cr3); if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); if (!irqchip_in_kernel(vcpu->kvm)) { memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, sizeof vcpu->irq_pending); vcpu->irq_summary = 0; for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) if (vcpu->irq_pending[i]) __set_bit(i, &vcpu->irq_summary); } else { max_bits = (sizeof sregs->interrupt_bitmap) << 3; pending_vec = find_first_bit( (const unsigned long *)sregs->interrupt_bitmap, max_bits); /* Only pending external irq is handled here */ if (pending_vec < max_bits) { kvm_x86_ops->set_irq(vcpu, pending_vec); printk("Set back pending irq %d\n", pending_vec); } } set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); set_segment(vcpu, &sregs->es, VCPU_SREG_ES); set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); vcpu_put(vcpu); return 0;}void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l){ struct kvm_segment cs; get_segment(vcpu, &cs, VCPU_SREG_CS); *db = cs.db; *l = cs.l;}EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);/* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. * * This list is modified at module load time to reflect the * capabilities of the host cpu. */static u32 msrs_to_save[] = { MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_K6_STAR,#ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,#endif MSR_IA32_TIME_STAMP_COUNTER,};static unsigned num_msrs_to_save;static u32 emulated_msrs[] = { MSR_IA32_MISC_ENABLE,};static __init void kvm_init_msr_list(void){ u32 dummy[2]; unsigned i, j; for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) continue; if (j < i) msrs_to_save[j] = msrs_to_save[i]; j++; } num_msrs_to_save = j;}/* * Adapt set_msr() to msr_io()'s calling convention */static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data){ return kvm_set_msr(vcpu, index, *data);}/* * Read or write a bunch of msrs. All parameters are kernel addresses. * * @return number of msrs set successfully. */static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, struct kvm_msr_entry *entries, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)){ int i; vcpu_load(vcpu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; vcpu_put(vcpu); return i;}/* * Read or write a bunch of msrs. Parameters are user addresses. * * @return number of msrs set successfully. */static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data), int writeback){ struct kvm_msrs msrs; struct kvm_msr_entry *entries; int r, n; unsigned size; r = -EFAULT; if (copy_from_user(&msrs, user_msrs, sizeof msrs)) goto out; r = -E2BIG; if (msrs.nmsrs >= MAX_IO_MSRS) goto out; r = -ENOMEM; size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; entries = vmalloc(size); if (!entries) goto out; r = -EFAULT; if (copy_from_user(entries, user_msrs->entries, size)) goto out_free; r = n = __msr_io(vcpu, &msrs, entries, do_msr); if (r < 0) goto out_free; r = -EFAULT; if (writeback && copy_to_user(user_msrs->entries, entries, size)) goto out_free; r = n;out_free: vfree(entries);out: return r;}/* * Translate a guest virtual address to a guest physical address. */static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr){ unsigned long vaddr = tr->linear_address; gpa_t gpa; vcpu_load(vcpu); mutex_lock(&vcpu->kvm->lock); gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; tr->usermode = 0; mutex_unlock(&vcpu->kvm->lock); vcpu_put(vcpu); return 0;}static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq){ if (irq->irq < 0 || irq->irq >= 256) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; vcpu_load(vcpu); set_bit(irq->irq, vcpu->irq_pending); set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary); vcpu_put(vcpu); return 0;}static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg){ int r; vcpu_load(vcpu); r = kvm_x86_ops->set_guest_debug(vcpu, dbg); vcpu_put(vcpu); return r;}static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, unsigned long address, int *type){ struct kvm_vcpu *vcpu = vma->vm_file->private_data; unsigned long pgoff; struct page *page; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (pgoff == 0) page = virt_to_page(vcpu->run); else if (pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->pio_data); else return NOPAGE_SIGBUS; get_page(page); if (type != NULL) *type = VM_FAULT_MINOR; return page;}static struct vm_operations_struct kvm_vcpu_vm_ops = { .nopage = kvm_vcpu_nopage,};static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma){ vma->vm_ops = &kvm_vcpu_vm_ops; return 0;}static int kvm_vcpu_release(struct inode *inode, struct file *filp){ struct kvm_vcpu *vcpu = filp->private_data; fput(vcpu->kvm->filp); return 0;}static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap,};/* * Allocates an inode for the vcpu. */static int create_vcpu_fd(struct kvm_vcpu *vcpu){ int fd, r; struct inode *inode; struct file *file; r = anon_inode_getfd(&fd, &inode, &file, "kvm-vcpu", &kvm_vcpu_fops, vcpu); if (r) return r; atomic_inc(&vcpu->kvm->filp->f_count); return fd;}/* * Creates some virtual cpus. Good luck creating more than one. */static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n){ int r; struct kvm_vcpu *vcpu; if (!valid_vcpu(n)) return -EINVAL; vcpu = kvm_x86_ops->vcpu_create(kvm, n); if (IS_ERR(vcpu)) return PTR_ERR(vcpu); preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); /* We do fxsave: this must be aligned. */ BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF); vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); if (r < 0) goto free_vcpu; mutex_lock(&kvm->lock); if (kvm->vcpus[n]) { r = -EEXIST; mutex_unlock(&kvm->lock); goto mmu_unload; } kvm->vcpus[n] = vcpu; mutex_unlock(&kvm->lock); /* Now it's all set up, let userspace reach it */ r = create_vcpu_fd(vcpu); if (r < 0) goto unlink; return r;unlink: mutex_lock(&kvm->lock); kvm->vcpus[n] = NULL; mutex_unlock(&kvm->lock);mmu_unload: vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu);free_vcpu: kvm_x86_ops->vcpu_free(vcpu); return r;}static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu){ u64 efer; int i; struct kvm_cpuid_entry *e, *entry; rdmsrl(MSR_EFER, efer); entry = NULL; for (i = 0; i < vcpu->cpuid_nent; ++i) { e = &vcpu->cpuid_entries[i]; if (e->function == 0x80000001) { entry = e; break; } } if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { entry->edx &= ~(1 << 20); printk(KERN_INFO "kvm: guest NX capability removed\n"); }}static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries){ int r; r = -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) goto out; r = -EFAULT; if (copy_from_user(&vcpu->cpuid_entries, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; cpuid_fix_nx_cap(vcpu); return 0;out: return r;}static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset){ if (sigset) { sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); vcpu->sigset_active = 1; vcpu->sigset = *sigset; } else vcpu->sigset_active = 0; return 0;}/* * fxsave fpu state. Taken from x86_64/processor.h. To be killed when * we have asm/x86/processor.h */struct fxsave { u16 cwd; u16 swd; u16 twd; u16 fop; u64 rip; u64 rdp; u32 mxcsr; u32 mxcsr_mask; u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */#ifdef CONFIG_X86_64 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */#else u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */#endif};static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu){ struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; vcpu_load(vcpu); memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; fpu->fsw = fxsave->swd; fpu->ftwx = fxsave->twd; fpu->last_opcode = fxsave->fop; fpu->last_ip = fxsave->rip; fpu->last_dp = fxsave->rdp; memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); vcpu_put(vcpu); return 0;}static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu){ struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; vcpu_load(vcpu); memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; fxsave->swd = fpu->fsw; fxsave->twd = fpu->ftwx; fxsave->fop = fpu->last_opcode; fxsave->rip = fpu->last_ip; fxsave->rdp = fpu->last_dp; memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); vcpu_put(vcpu); return 0;}static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s){ vcpu_load(vcpu); memcpy(s->regs, vcpu->apic->regs, sizeof *s); vcpu_put(vcpu); return 0;}static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s){ vcpu_load(vcpu); memcpy(vcpu->apic->regs, s->regs, sizeof *s); kvm_apic_post_state_restore(vcpu); vcpu_put(vcpu); return 0;}static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg){ struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; int r = -EINVAL; switch (ioctl) { case KVM_RUN: r = -EINVAL; if (arg) goto out; r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); break; case KVM_GET_REGS: { struct kvm_regs kvm_regs; memset(&kvm_regs, 0, sizeof kvm_regs); r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs); if (r) goto out; r = -EFAULT; if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) goto out; r = 0; break; } case KVM_SET_REGS: { struct kvm_regs kvm_regs; r = -EFAULT; if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) goto out; r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs); if
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?