hvm.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 2,304 行 · 第 1/5 页

C
2,304
字号
/* * hvm.c: Common hardware virtual machine abstractions. * * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, International Business Machines Corporation. * Copyright (c) 2008, Citrix Systems, Inc. *  * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/domain.h>#include <xen/domain_page.h>#include <xen/hypercall.h>#include <xen/guest_access.h>#include <xen/event.h>#include <asm/current.h>#include <asm/e820.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/msr.h>#include <asm/mc146818rtc.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/vpt.h>#include <asm/hvm/support.h>#include <asm/hvm/cacheattr.h>#include <asm/hvm/trace.h>#include <public/sched.h>#include <public/hvm/ioreq.h>#include <public/version.h>#include <public/memory.h>int hvm_enabled __read_mostly;unsigned int opt_hvm_debug_level __read_mostly;integer_param("hvm_debug", opt_hvm_debug_level);int opt_softtsc;boolean_param("softtsc", opt_softtsc);struct hvm_function_table hvm_funcs __read_mostly;/* I/O permission bitmap is globally shared by all HVM guests. */unsigned long __attribute__ ((__section__ (".bss.page_aligned")))    hvm_io_bitmap[3*PAGE_SIZE/BYTES_PER_LONG];void hvm_enable(struct hvm_function_table *fns){    BUG_ON(hvm_enabled);    printk("HVM: %s enabled\n", fns->name);    /*     * Allow direct access to the PC debug port (it is often used for I/O     * delays, but the vmexits simply slow things down).     */    memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));    __clear_bit(0x80, hvm_io_bitmap);    hvm_funcs   = *fns;    hvm_enabled = 1;    if ( hvm_funcs.hap_supported )        printk("HVM: Hardware Assisted Paging detected.\n");}/* * Need to re-inject a given event? We avoid re-injecting software exceptions * and interrupts because the faulting/trapping instruction can simply be * re-executed (neither VMX nor SVM update RIP when they VMEXIT during * INT3/INTO/INTn). */int hvm_event_needs_reinjection(uint8_t type, uint8_t vector){    switch ( type )    {    case X86_EVENTTYPE_EXT_INTR:    case X86_EVENTTYPE_NMI:        return 1;    case X86_EVENTTYPE_HW_EXCEPTION:        /*         * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly         * check for these vectors, as they are really SW Exceptions. SVM has         * not updated RIP to point after the trapping instruction (INT3/INTO).         */        return (vector != 3) && (vector != 4);    default:        /* Software exceptions/interrupts can be re-executed (e.g., INT n). */        break;    }    return 0;}/* * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1. * This means we can assume that @vec2 is contributory or a page fault. */uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2){    /* Exception during double-fault delivery always causes a triple fault. */    if ( vec1 == TRAP_double_fault )    {        hvm_triple_fault();        return TRAP_double_fault; /* dummy return */    }    /* Exception during page-fault delivery always causes a double fault. */    if ( vec1 == TRAP_page_fault )        return TRAP_double_fault;    /* Discard the first exception if it's benign or if we now have a #PF. */    if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) )        return vec2;    /* Cannot combine the exceptions: double fault. */    return TRAP_double_fault;}void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc){    u64 host_tsc;    rdtscll(host_tsc);    v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc;    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);}u64 hvm_get_guest_tsc(struct vcpu *v){    u64 host_tsc;    if ( opt_softtsc )        host_tsc = hvm_get_guest_time(v);    else        rdtscll(host_tsc);    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;}void hvm_migrate_timers(struct vcpu *v){    rtc_migrate_timers(v);    hpet_migrate_timers(v);    pt_migrate(v);}void hvm_do_resume(struct vcpu *v){    ioreq_t *p;    pt_restore_timer(v);    /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */    p = &get_ioreq(v)->vp_ioreq;    while ( p->state != STATE_IOREQ_NONE )    {        switch ( p->state )        {        case STATE_IORESP_READY: /* IORESP_READY -> NONE */            hvm_io_assist();            break;        case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY */        case STATE_IOREQ_INPROCESS:            wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,                                      (p->state != STATE_IOREQ_READY) &&                                      (p->state != STATE_IOREQ_INPROCESS));            break;        default:            gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);            domain_crash(v->domain);            return; /* bail */        }    }}static void hvm_init_ioreq_page(    struct domain *d, struct hvm_ioreq_page *iorp){    memset(iorp, 0, sizeof(*iorp));    spin_lock_init(&iorp->lock);    domain_pause(d);}static void hvm_destroy_ioreq_page(    struct domain *d, struct hvm_ioreq_page *iorp){    spin_lock(&iorp->lock);    ASSERT(d->is_dying);    if ( iorp->va != NULL )    {        unmap_domain_page_global(iorp->va);        put_page_and_type(iorp->page);        iorp->va = NULL;    }    spin_unlock(&iorp->lock);}static int hvm_set_ioreq_page(    struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn){    struct page_info *page;    p2m_type_t p2mt;    unsigned long mfn;    void *va;    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));    if ( !p2m_is_ram(p2mt) )        return -EINVAL;    ASSERT(mfn_valid(mfn));    page = mfn_to_page(mfn);    if ( !get_page_and_type(page, d, PGT_writable_page) )        return -EINVAL;    va = map_domain_page_global(mfn);    if ( va == NULL )    {        put_page_and_type(page);        return -ENOMEM;    }    spin_lock(&iorp->lock);    if ( (iorp->va != NULL) || d->is_dying )    {        spin_unlock(&iorp->lock);        unmap_domain_page_global(va);        put_page_and_type(mfn_to_page(mfn));        return -EINVAL;    }    iorp->va = va;    iorp->page = page;    spin_unlock(&iorp->lock);    domain_unpause(d);    return 0;}static int hvm_print_line(    int dir, uint32_t port, uint32_t bytes, uint32_t *val){    struct vcpu *curr = current;    struct hvm_domain *hd = &curr->domain->arch.hvm_domain;    char c = *val;    BUG_ON(bytes != 1);    spin_lock(&hd->pbuf_lock);    hd->pbuf[hd->pbuf_idx++] = c;    if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )    {        if ( c != '\n' )            hd->pbuf[hd->pbuf_idx++] = '\n';        hd->pbuf[hd->pbuf_idx] = '\0';        printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf);        hd->pbuf_idx = 0;    }    spin_unlock(&hd->pbuf_lock);    return X86EMUL_OKAY;}int hvm_domain_initialise(struct domain *d){    int rc;    if ( !hvm_enabled )    {        gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "                 "on a non-VT/AMDV platform.\n");        return -EINVAL;    }    spin_lock_init(&d->arch.hvm_domain.pbuf_lock);    spin_lock_init(&d->arch.hvm_domain.irq_lock);    spin_lock_init(&d->arch.hvm_domain.uc_lock);    hvm_init_guest_time(d);    d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;    hvm_init_cacheattr_region_list(d);    rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);    if ( rc != 0 )        goto fail1;    vpic_init(d);    rc = vioapic_init(d);    if ( rc != 0 )        goto fail1;    stdvga_init(d);    rtc_init(d);    hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq);    hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);    register_portio_handler(d, 0xe9, 1, hvm_print_line);    rc = hvm_funcs.domain_initialise(d);    if ( rc != 0 )        goto fail2;    return 0; fail2:    rtc_deinit(d);    stdvga_deinit(d);    vioapic_deinit(d); fail1:    hvm_destroy_cacheattr_region_list(d);    return rc;}void hvm_domain_relinquish_resources(struct domain *d){    hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);    hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);    /* Stop all asynchronous timer actions. */    rtc_deinit(d);    if ( d->vcpu[0] != NULL )    {        pit_deinit(d);        pmtimer_deinit(d);        hpet_deinit(d);    }}void hvm_domain_destroy(struct domain *d){    hvm_funcs.domain_destroy(d);    rtc_deinit(d);    stdvga_deinit(d);    vioapic_deinit(d);    hvm_destroy_cacheattr_region_list(d);}static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h){    struct vcpu *v;    struct hvm_hw_cpu ctxt;    struct segment_register seg;    struct vcpu_guest_context *vc;    for_each_vcpu ( d, v )    {        /* We don't need to save state for a vcpu that is down; the restore          * code will leave it down if there is nothing saved. */        if ( test_bit(_VPF_down, &v->pause_flags) )             continue;        /* Architecture-specific vmcs/vmcb bits */        hvm_funcs.save_cpu_ctxt(v, &ctxt);        hvm_get_segment_register(v, x86_seg_idtr, &seg);        ctxt.idtr_limit = seg.limit;        ctxt.idtr_base = seg.base;        hvm_get_segment_register(v, x86_seg_gdtr, &seg);        ctxt.gdtr_limit = seg.limit;        ctxt.gdtr_base = seg.base;        hvm_get_segment_register(v, x86_seg_cs, &seg);        ctxt.cs_sel = seg.sel;        ctxt.cs_limit = seg.limit;        ctxt.cs_base = seg.base;        ctxt.cs_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_ds, &seg);        ctxt.ds_sel = seg.sel;        ctxt.ds_limit = seg.limit;        ctxt.ds_base = seg.base;        ctxt.ds_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_es, &seg);        ctxt.es_sel = seg.sel;        ctxt.es_limit = seg.limit;        ctxt.es_base = seg.base;        ctxt.es_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_ss, &seg);        ctxt.ss_sel = seg.sel;        ctxt.ss_limit = seg.limit;        ctxt.ss_base = seg.base;        ctxt.ss_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_fs, &seg);        ctxt.fs_sel = seg.sel;        ctxt.fs_limit = seg.limit;        ctxt.fs_base = seg.base;        ctxt.fs_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_gs, &seg);        ctxt.gs_sel = seg.sel;        ctxt.gs_limit = seg.limit;        ctxt.gs_base = seg.base;        ctxt.gs_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_tr, &seg);        ctxt.tr_sel = seg.sel;        ctxt.tr_limit = seg.limit;        ctxt.tr_base = seg.base;        ctxt.tr_arbytes = seg.attr.bytes;        hvm_get_segment_register(v, x86_seg_ldtr, &seg);        ctxt.ldtr_sel = seg.sel;        ctxt.ldtr_limit = seg.limit;        ctxt.ldtr_base = seg.base;        ctxt.ldtr_arbytes = seg.attr.bytes;        vc = &v->arch.guest_context;        if ( v->fpu_initialised )            memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));        else             memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));        ctxt.rax = vc->user_regs.eax;        ctxt.rbx = vc->user_regs.ebx;        ctxt.rcx = vc->user_regs.ecx;        ctxt.rdx = vc->user_regs.edx;        ctxt.rbp = vc->user_regs.ebp;        ctxt.rsi = vc->user_regs.esi;        ctxt.rdi = vc->user_regs.edi;        ctxt.rsp = vc->user_regs.esp;        ctxt.rip = vc->user_regs.eip;        ctxt.rflags = vc->user_regs.eflags;#ifdef __x86_64__        ctxt.r8  = vc->user_regs.r8;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?