hvm.c

来自「xen 3.2.2 源码」· C语言 代码 · 共 2,070 行 · 第 1/4 页

C
2,070
字号
/* * hvm.c: Common hardware virtual machine abstractions. * * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, International Business Machines Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/domain.h>#include <xen/domain_page.h>#include <xen/hypercall.h>#include <xen/guest_access.h>#include <xen/event.h>#include <asm/current.h>#include <asm/e820.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/msr.h>#include <asm/mc146818rtc.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/vpt.h>#include <asm/hvm/support.h>#include <asm/hvm/cacheattr.h>#include <public/sched.h>#include <public/hvm/ioreq.h>#include <public/version.h>#include <public/memory.h>/* * Xen command-line option to allow/disallow hardware-assisted paging. * Since the phys-to-machine table of AMD NPT is in host format, 32-bit Xen * can only support guests using NPT with up to a 4GB memory map. Therefore * we disallow HAP by default on PAE Xen (by default we want to support an * 8GB pseudophysical memory map for HVM guests on a PAE host). */static int opt_hap_permitted = (CONFIG_PAGING_LEVELS != 3);boolean_param("hap", opt_hap_permitted);int hvm_enabled __read_mostly;unsigned int opt_hvm_debug_level __read_mostly;integer_param("hvm_debug", opt_hvm_debug_level);struct hvm_function_table hvm_funcs __read_mostly;/* I/O permission bitmap is globally shared by all HVM guests. */char __attribute__ ((__section__ (".bss.page_aligned")))    hvm_io_bitmap[3*PAGE_SIZE];void hvm_enable(struct hvm_function_table *fns){    BUG_ON(hvm_enabled);    printk("HVM: %s enabled\n", fns->name);    /*     * Allow direct access to the PC debug port (it is often used for I/O     * delays, but the vmexits simply slow things down).     */    memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));    __clear_bit(0x80, hvm_io_bitmap);    hvm_funcs   = *fns;    hvm_enabled = 1;    if ( hvm_funcs.hap_supported )    {        if ( !opt_hap_permitted )            hvm_funcs.hap_supported = 0;        printk("HVM: Hardware Assisted Paging detected %s.\n",               hvm_funcs.hap_supported ? "and enabled" : "but disabled");    }}void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc){    u64 host_tsc;    rdtscll(host_tsc);    v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc;    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);}u64 hvm_get_guest_tsc(struct vcpu *v){    u64 host_tsc;    rdtscll(host_tsc);    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;}void hvm_migrate_timers(struct vcpu *v){    rtc_migrate_timers(v);    hpet_migrate_timers(v);    pt_migrate(v);}void hvm_do_resume(struct vcpu *v){    ioreq_t *p;    pt_restore_timer(v);    /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */    p = &get_ioreq(v)->vp_ioreq;    while ( p->state != STATE_IOREQ_NONE )    {        switch ( p->state )        {        case STATE_IORESP_READY: /* IORESP_READY -> NONE */            hvm_io_assist();            break;        case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY */        case STATE_IOREQ_INPROCESS:            wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,                                      (p->state != STATE_IOREQ_READY) &&                                      (p->state != STATE_IOREQ_INPROCESS));            break;        default:            gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);            domain_crash_synchronous();        }    }}static void hvm_init_ioreq_page(    struct domain *d, struct hvm_ioreq_page *iorp){    memset(iorp, 0, sizeof(*iorp));    spin_lock_init(&iorp->lock);    domain_pause(d);}static void hvm_destroy_ioreq_page(    struct domain *d, struct hvm_ioreq_page *iorp){    spin_lock(&iorp->lock);    ASSERT(d->is_dying);    if ( iorp->va != NULL )    {        unmap_domain_page_global(iorp->va);        put_page_and_type(iorp->page);        iorp->va = NULL;    }    spin_unlock(&iorp->lock);}static int hvm_set_ioreq_page(    struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn){    struct page_info *page;    p2m_type_t p2mt;    unsigned long mfn;    void *va;    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));    if ( !p2m_is_ram(p2mt) )        return -EINVAL;    ASSERT(mfn_valid(mfn));    page = mfn_to_page(mfn);    if ( !get_page_and_type(page, d, PGT_writable_page) )        return -EINVAL;    va = map_domain_page_global(mfn);    if ( va == NULL )    {        put_page_and_type(page);        return -ENOMEM;    }    spin_lock(&iorp->lock);    if ( (iorp->va != NULL) || d->is_dying )    {        spin_unlock(&iorp->lock);        unmap_domain_page_global(va);        put_page_and_type(mfn_to_page(mfn));        return -EINVAL;    }    iorp->va = va;    iorp->page = page;    spin_unlock(&iorp->lock);    domain_unpause(d);    return 0;}int hvm_domain_initialise(struct domain *d){    int rc;    if ( !hvm_enabled )    {        gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "                 "on a non-VT/AMDV platform.\n");        return -EINVAL;    }    spin_lock_init(&d->arch.hvm_domain.pbuf_lock);    spin_lock_init(&d->arch.hvm_domain.irq_lock);    spin_lock_init(&d->arch.hvm_domain.uc_lock);    hvm_init_cacheattr_region_list(d);    rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);    if ( rc != 0 )        goto fail1;    vpic_init(d);    rc = vioapic_init(d);    if ( rc != 0 )        goto fail1;    stdvga_init(d);    hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);    hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);    rc = hvm_funcs.domain_initialise(d);    if ( rc != 0 )        goto fail2;    return 0; fail2:    vioapic_deinit(d); fail1:    hvm_destroy_cacheattr_region_list(d);    return rc;}void hvm_domain_relinquish_resources(struct domain *d){    hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);    hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);    pit_deinit(d);    rtc_deinit(d);    pmtimer_deinit(d);    hpet_deinit(d);    stdvga_deinit(d);}void hvm_domain_destroy(struct domain *d){    hvm_funcs.domain_destroy(d);    vioapic_deinit(d);    hvm_destroy_cacheattr_region_list(d);}static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h){    struct vcpu *v;    struct hvm_hw_cpu ctxt;    struct vcpu_guest_context *vc;    for_each_vcpu(d, v)    {        /* We don't need to save state for a vcpu that is down; the restore          * code will leave it down if there is nothing saved. */        if ( test_bit(_VPF_down, &v->pause_flags) )             continue;        /* Architecture-specific vmcs/vmcb bits */        hvm_funcs.save_cpu_ctxt(v, &ctxt);        /* Other vcpu register state */        vc = &v->arch.guest_context;        if ( v->fpu_initialised )            memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));        else             memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));        ctxt.rax = vc->user_regs.eax;        ctxt.rbx = vc->user_regs.ebx;        ctxt.rcx = vc->user_regs.ecx;        ctxt.rdx = vc->user_regs.edx;        ctxt.rbp = vc->user_regs.ebp;        ctxt.rsi = vc->user_regs.esi;        ctxt.rdi = vc->user_regs.edi;        ctxt.rsp = vc->user_regs.esp;        ctxt.rip = vc->user_regs.eip;        ctxt.rflags = vc->user_regs.eflags;#ifdef __x86_64__        ctxt.r8  = vc->user_regs.r8;        ctxt.r9  = vc->user_regs.r9;        ctxt.r10 = vc->user_regs.r10;        ctxt.r11 = vc->user_regs.r11;        ctxt.r12 = vc->user_regs.r12;        ctxt.r13 = vc->user_regs.r13;        ctxt.r14 = vc->user_regs.r14;        ctxt.r15 = vc->user_regs.r15;#endif        ctxt.dr0 = vc->debugreg[0];        ctxt.dr1 = vc->debugreg[1];        ctxt.dr2 = vc->debugreg[2];        ctxt.dr3 = vc->debugreg[3];        ctxt.dr6 = vc->debugreg[6];        ctxt.dr7 = vc->debugreg[7];        if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )            return 1;     }    return 0;}static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h){    int vcpuid, rc;    struct vcpu *v;    struct hvm_hw_cpu ctxt;    struct vcpu_guest_context *vc;    /* Which vcpu is this? */    vcpuid = hvm_load_instance(h);    if ( vcpuid > MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL )     {        gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);        return -EINVAL;    }    vc = &v->arch.guest_context;    /* Need to init this vcpu before loading its contents */    LOCK_BIGLOCK(d);    if ( !v->is_initialised )        if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )            return rc;    UNLOCK_BIGLOCK(d);    if ( hvm_load_entry(CPU, h, &ctxt) != 0 )         return -EINVAL;    /* Sanity check some control registers. */    if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||         !(ctxt.cr0 & X86_CR0_ET) ||         ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )    {        gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",                 ctxt.cr0);        return -EINVAL;    }    if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )    {        gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",                 ctxt.cr4);        return -EINVAL;    }    if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA |                            EFER_NX | EFER_SCE)) ||         ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) ||         (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) ||         (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) ||         (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) ||         ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) )    {        gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n",                 ctxt.msr_efer);        return -EINVAL;    }    /* Architecture-specific vmcs/vmcb bits */    if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )        return -EINVAL;    /* Other vcpu register state */    memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));    vc->user_regs.eax = ctxt.rax;    vc->user_regs.ebx = ctxt.rbx;    vc->user_regs.ecx = ctxt.rcx;    vc->user_regs.edx = ctxt.rdx;    vc->user_regs.ebp = ctxt.rbp;    vc->user_regs.esi = ctxt.rsi;    vc->user_regs.edi = ctxt.rdi;    vc->user_regs.esp = ctxt.rsp;    vc->user_regs.eip = ctxt.rip;    vc->user_regs.eflags = ctxt.rflags | 2;#ifdef __x86_64__    vc->user_regs.r8  = ctxt.r8;     vc->user_regs.r9  = ctxt.r9;     vc->user_regs.r10 = ctxt.r10;    vc->user_regs.r11 = ctxt.r11;    vc->user_regs.r12 = ctxt.r12;    vc->user_regs.r13 = ctxt.r13;    vc->user_regs.r14 = ctxt.r14;    vc->user_regs.r15 = ctxt.r15;#endif    vc->debugreg[0] = ctxt.dr0;    vc->debugreg[1] = ctxt.dr1;    vc->debugreg[2] = ctxt.dr2;    vc->debugreg[3] = ctxt.dr3;    vc->debugreg[6] = ctxt.dr6;    vc->debugreg[7] = ctxt.dr7;    vc->flags = VGCF_online;    v->fpu_initialised = 1;    /* Auxiliary processors should be woken immediately. */    if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )        vcpu_wake(v);    return 0;}HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,                          1, HVMSR_PER_VCPU);extern int reset_vmsr(struct mtrr_state *m, u64 *p);int hvm_vcpu_initialise(struct vcpu *v){    int rc;    if ( (rc = vlapic_init(v)) != 0 )        goto fail1;    if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )        goto fail2;    /* Create ioreq event channel. */    rc = alloc_unbound_xen_event_channel(v, 0);    if ( rc < 0 )        goto fail3;    /* Register ioreq event channel. */    v->arch.hvm_vcpu.xen_port = rc;    spin_lock(&v->domain->arch.hvm_domain.ioreq.lock);    if ( v->domain->arch.hvm_domain.ioreq.va != NULL )        get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;    spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock);    spin_lock_init(&v->arch.hvm_vcpu.tm_lock);    INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);    rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);    if ( rc != 0 )        goto fail3;    v->arch.guest_context.user_regs.eflags = 2;    if ( v->vcpu_id == 0 )    {        /* NB. All these really belong in hvm_domain_initialise(). */        pit_init(v, cpu_khz);        rtc_init(v, RTC_PORT(0));        pmtimer_init(v);        hpet_init(v);         /* Init guest TSC to start from zero. */        hvm_set_guest_time(v, 0);        /* Can start up without SIPI-SIPI or setvcpucontext domctl. */        v->is_initialised = 1;        clear_bit(_VPF_down, &v->pause_flags);    }    return 0; fail3:    hvm_funcs.vcpu_destroy(v); fail2:    vlapic_destroy(v); fail1:    return rc;}void hvm_vcpu_destroy(struct vcpu *v){    xfree(v->arch.hvm_vcpu.mtrr.var_ranges);    vlapic_destroy(v);    hvm_funcs.vcpu_destroy(v);    /* Event channel is already freed by evtchn_destroy(). */    /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/}void hvm_vcpu_reset(struct vcpu *v){    vcpu_pause(v);    vlapic_reset(vcpu_vlapic(v));    hvm_funcs.vcpu_initialise(v);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?