hvm.c
来自「xen 3.2.2 源码」· C语言 代码 · 共 2,070 行 · 第 1/4 页
C
2,070 行
/* * hvm.c: Common hardware virtual machine abstractions. * * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, International Business Machines Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/domain.h>#include <xen/domain_page.h>#include <xen/hypercall.h>#include <xen/guest_access.h>#include <xen/event.h>#include <asm/current.h>#include <asm/e820.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/msr.h>#include <asm/mc146818rtc.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/vpt.h>#include <asm/hvm/support.h>#include <asm/hvm/cacheattr.h>#include <public/sched.h>#include <public/hvm/ioreq.h>#include <public/version.h>#include <public/memory.h>/* * Xen command-line option to allow/disallow hardware-assisted paging. * Since the phys-to-machine table of AMD NPT is in host format, 32-bit Xen * can only support guests using NPT with up to a 4GB memory map. Therefore * we disallow HAP by default on PAE Xen (by default we want to support an * 8GB pseudophysical memory map for HVM guests on a PAE host). */static int opt_hap_permitted = (CONFIG_PAGING_LEVELS != 3);boolean_param("hap", opt_hap_permitted);int hvm_enabled __read_mostly;unsigned int opt_hvm_debug_level __read_mostly;integer_param("hvm_debug", opt_hvm_debug_level);struct hvm_function_table hvm_funcs __read_mostly;/* I/O permission bitmap is globally shared by all HVM guests. */char __attribute__ ((__section__ (".bss.page_aligned"))) hvm_io_bitmap[3*PAGE_SIZE];void hvm_enable(struct hvm_function_table *fns){ BUG_ON(hvm_enabled); printk("HVM: %s enabled\n", fns->name); /* * Allow direct access to the PC debug port (it is often used for I/O * delays, but the vmexits simply slow things down). */ memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap)); __clear_bit(0x80, hvm_io_bitmap); hvm_funcs = *fns; hvm_enabled = 1; if ( hvm_funcs.hap_supported ) { if ( !opt_hap_permitted ) hvm_funcs.hap_supported = 0; printk("HVM: Hardware Assisted Paging detected %s.\n", hvm_funcs.hap_supported ? "and enabled" : "but disabled"); }}void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc){ u64 host_tsc; rdtscll(host_tsc); v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc; hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);}u64 hvm_get_guest_tsc(struct vcpu *v){ u64 host_tsc; rdtscll(host_tsc); return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;}void hvm_migrate_timers(struct vcpu *v){ rtc_migrate_timers(v); hpet_migrate_timers(v); pt_migrate(v);}void hvm_do_resume(struct vcpu *v){ ioreq_t *p; pt_restore_timer(v); /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */ p = &get_ioreq(v)->vp_ioreq; while ( p->state != STATE_IOREQ_NONE ) { switch ( p->state ) { case STATE_IORESP_READY: /* IORESP_READY -> NONE */ hvm_io_assist(); break; case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ case STATE_IOREQ_INPROCESS: wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port, (p->state != STATE_IOREQ_READY) && (p->state != STATE_IOREQ_INPROCESS)); break; default: gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state); domain_crash_synchronous(); } }}static void hvm_init_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp){ memset(iorp, 0, sizeof(*iorp)); spin_lock_init(&iorp->lock); domain_pause(d);}static void hvm_destroy_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp){ spin_lock(&iorp->lock); ASSERT(d->is_dying); if ( iorp->va != NULL ) { unmap_domain_page_global(iorp->va); put_page_and_type(iorp->page); iorp->va = NULL; } spin_unlock(&iorp->lock);}static int hvm_set_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn){ struct page_info *page; p2m_type_t p2mt; unsigned long mfn; void *va; mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); if ( !p2m_is_ram(p2mt) ) return -EINVAL; ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( !get_page_and_type(page, d, PGT_writable_page) ) return -EINVAL; va = map_domain_page_global(mfn); if ( va == NULL ) { put_page_and_type(page); return -ENOMEM; } spin_lock(&iorp->lock); if ( (iorp->va != NULL) || d->is_dying ) { spin_unlock(&iorp->lock); unmap_domain_page_global(va); put_page_and_type(mfn_to_page(mfn)); return -EINVAL; } iorp->va = va; iorp->page = page; spin_unlock(&iorp->lock); domain_unpause(d); return 0;}int hvm_domain_initialise(struct domain *d){ int rc; if ( !hvm_enabled ) { gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest " "on a non-VT/AMDV platform.\n"); return -EINVAL; } spin_lock_init(&d->arch.hvm_domain.pbuf_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); spin_lock_init(&d->arch.hvm_domain.uc_lock); hvm_init_cacheattr_region_list(d); rc = paging_enable(d, PG_refcounts|PG_translate|PG_external); if ( rc != 0 ) goto fail1; vpic_init(d); rc = vioapic_init(d); if ( rc != 0 ) goto fail1; stdvga_init(d); hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); rc = hvm_funcs.domain_initialise(d); if ( rc != 0 ) goto fail2; return 0; fail2: vioapic_deinit(d); fail1: hvm_destroy_cacheattr_region_list(d); return rc;}void hvm_domain_relinquish_resources(struct domain *d){ hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); pit_deinit(d); rtc_deinit(d); pmtimer_deinit(d); hpet_deinit(d); stdvga_deinit(d);}void hvm_domain_destroy(struct domain *d){ hvm_funcs.domain_destroy(d); vioapic_deinit(d); hvm_destroy_cacheattr_region_list(d);}static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h){ struct vcpu *v; struct hvm_hw_cpu ctxt; struct vcpu_guest_context *vc; for_each_vcpu(d, v) { /* We don't need to save state for a vcpu that is down; the restore * code will leave it down if there is nothing saved. */ if ( test_bit(_VPF_down, &v->pause_flags) ) continue; /* Architecture-specific vmcs/vmcb bits */ hvm_funcs.save_cpu_ctxt(v, &ctxt); /* Other vcpu register state */ vc = &v->arch.guest_context; if ( v->fpu_initialised ) memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs)); else memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs)); ctxt.rax = vc->user_regs.eax; ctxt.rbx = vc->user_regs.ebx; ctxt.rcx = vc->user_regs.ecx; ctxt.rdx = vc->user_regs.edx; ctxt.rbp = vc->user_regs.ebp; ctxt.rsi = vc->user_regs.esi; ctxt.rdi = vc->user_regs.edi; ctxt.rsp = vc->user_regs.esp; ctxt.rip = vc->user_regs.eip; ctxt.rflags = vc->user_regs.eflags;#ifdef __x86_64__ ctxt.r8 = vc->user_regs.r8; ctxt.r9 = vc->user_regs.r9; ctxt.r10 = vc->user_regs.r10; ctxt.r11 = vc->user_regs.r11; ctxt.r12 = vc->user_regs.r12; ctxt.r13 = vc->user_regs.r13; ctxt.r14 = vc->user_regs.r14; ctxt.r15 = vc->user_regs.r15;#endif ctxt.dr0 = vc->debugreg[0]; ctxt.dr1 = vc->debugreg[1]; ctxt.dr2 = vc->debugreg[2]; ctxt.dr3 = vc->debugreg[3]; ctxt.dr6 = vc->debugreg[6]; ctxt.dr7 = vc->debugreg[7]; if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 ) return 1; } return 0;}static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h){ int vcpuid, rc; struct vcpu *v; struct hvm_hw_cpu ctxt; struct vcpu_guest_context *vc; /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); if ( vcpuid > MAX_VIRT_CPUS || (v = d->vcpu[vcpuid]) == NULL ) { gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid); return -EINVAL; } vc = &v->arch.guest_context; /* Need to init this vcpu before loading its contents */ LOCK_BIGLOCK(d); if ( !v->is_initialised ) if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 ) return rc; UNLOCK_BIGLOCK(d); if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) return -EINVAL; /* Sanity check some control registers. */ if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) || !(ctxt.cr0 & X86_CR0_ET) || ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) ) { gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n", ctxt.cr0); return -EINVAL; } if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS ) { gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n", ctxt.cr4); return -EINVAL; } if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) || ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) || (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) || (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) || (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) || ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) ) { gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n", ctxt.msr_efer); return -EINVAL; } /* Architecture-specific vmcs/vmcb bits */ if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 ) return -EINVAL; /* Other vcpu register state */ memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); vc->user_regs.eax = ctxt.rax; vc->user_regs.ebx = ctxt.rbx; vc->user_regs.ecx = ctxt.rcx; vc->user_regs.edx = ctxt.rdx; vc->user_regs.ebp = ctxt.rbp; vc->user_regs.esi = ctxt.rsi; vc->user_regs.edi = ctxt.rdi; vc->user_regs.esp = ctxt.rsp; vc->user_regs.eip = ctxt.rip; vc->user_regs.eflags = ctxt.rflags | 2;#ifdef __x86_64__ vc->user_regs.r8 = ctxt.r8; vc->user_regs.r9 = ctxt.r9; vc->user_regs.r10 = ctxt.r10; vc->user_regs.r11 = ctxt.r11; vc->user_regs.r12 = ctxt.r12; vc->user_regs.r13 = ctxt.r13; vc->user_regs.r14 = ctxt.r14; vc->user_regs.r15 = ctxt.r15;#endif vc->debugreg[0] = ctxt.dr0; vc->debugreg[1] = ctxt.dr1; vc->debugreg[2] = ctxt.dr2; vc->debugreg[3] = ctxt.dr3; vc->debugreg[6] = ctxt.dr6; vc->debugreg[7] = ctxt.dr7; vc->flags = VGCF_online; v->fpu_initialised = 1; /* Auxiliary processors should be woken immediately. */ if ( test_and_clear_bit(_VPF_down, &v->pause_flags) ) vcpu_wake(v); return 0;}HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1, HVMSR_PER_VCPU);extern int reset_vmsr(struct mtrr_state *m, u64 *p);int hvm_vcpu_initialise(struct vcpu *v){ int rc; if ( (rc = vlapic_init(v)) != 0 ) goto fail1; if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) goto fail2; /* Create ioreq event channel. */ rc = alloc_unbound_xen_event_channel(v, 0); if ( rc < 0 ) goto fail3; /* Register ioreq event channel. */ v->arch.hvm_vcpu.xen_port = rc; spin_lock(&v->domain->arch.hvm_domain.ioreq.lock); if ( v->domain->arch.hvm_domain.ioreq.va != NULL ) get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; spin_unlock(&v->domain->arch.hvm_domain.ioreq.lock); spin_lock_init(&v->arch.hvm_vcpu.tm_lock); INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list); rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr); if ( rc != 0 ) goto fail3; v->arch.guest_context.user_regs.eflags = 2; if ( v->vcpu_id == 0 ) { /* NB. All these really belong in hvm_domain_initialise(). */ pit_init(v, cpu_khz); rtc_init(v, RTC_PORT(0)); pmtimer_init(v); hpet_init(v); /* Init guest TSC to start from zero. */ hvm_set_guest_time(v, 0); /* Can start up without SIPI-SIPI or setvcpucontext domctl. */ v->is_initialised = 1; clear_bit(_VPF_down, &v->pause_flags); } return 0; fail3: hvm_funcs.vcpu_destroy(v); fail2: vlapic_destroy(v); fail1: return rc;}void hvm_vcpu_destroy(struct vcpu *v){ xfree(v->arch.hvm_vcpu.mtrr.var_ranges); vlapic_destroy(v); hvm_funcs.vcpu_destroy(v); /* Event channel is already freed by evtchn_destroy(). */ /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/}void hvm_vcpu_reset(struct vcpu *v){ vcpu_pause(v); vlapic_reset(vcpu_vlapic(v)); hvm_funcs.vcpu_initialise(v);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?