hvm.c
来自「xen虚拟机源代码安装包」· C语言 代码 · 共 2,304 行 · 第 1/5 页
C
2,304 行
/* * hvm.c: Common hardware virtual machine abstractions. * * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, International Business Machines Corporation. * Copyright (c) 2008, Citrix Systems, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/domain.h>#include <xen/domain_page.h>#include <xen/hypercall.h>#include <xen/guest_access.h>#include <xen/event.h>#include <asm/current.h>#include <asm/e820.h>#include <asm/io.h>#include <asm/paging.h>#include <asm/regs.h>#include <asm/cpufeature.h>#include <asm/processor.h>#include <asm/types.h>#include <asm/msr.h>#include <asm/mc146818rtc.h>#include <asm/spinlock.h>#include <asm/hvm/hvm.h>#include <asm/hvm/vpt.h>#include <asm/hvm/support.h>#include <asm/hvm/cacheattr.h>#include <asm/hvm/trace.h>#include <public/sched.h>#include <public/hvm/ioreq.h>#include <public/version.h>#include <public/memory.h>int hvm_enabled __read_mostly;unsigned int opt_hvm_debug_level __read_mostly;integer_param("hvm_debug", opt_hvm_debug_level);int opt_softtsc;boolean_param("softtsc", opt_softtsc);struct hvm_function_table hvm_funcs __read_mostly;/* I/O permission bitmap is globally shared by all HVM guests. */unsigned long __attribute__ ((__section__ (".bss.page_aligned"))) hvm_io_bitmap[3*PAGE_SIZE/BYTES_PER_LONG];void hvm_enable(struct hvm_function_table *fns){ BUG_ON(hvm_enabled); printk("HVM: %s enabled\n", fns->name); /* * Allow direct access to the PC debug port (it is often used for I/O * delays, but the vmexits simply slow things down). */ memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap)); __clear_bit(0x80, hvm_io_bitmap); hvm_funcs = *fns; hvm_enabled = 1; if ( hvm_funcs.hap_supported ) printk("HVM: Hardware Assisted Paging detected.\n");}/* * Need to re-inject a given event? We avoid re-injecting software exceptions * and interrupts because the faulting/trapping instruction can simply be * re-executed (neither VMX nor SVM update RIP when they VMEXIT during * INT3/INTO/INTn). */int hvm_event_needs_reinjection(uint8_t type, uint8_t vector){ switch ( type ) { case X86_EVENTTYPE_EXT_INTR: case X86_EVENTTYPE_NMI: return 1; case X86_EVENTTYPE_HW_EXCEPTION: /* * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly * check for these vectors, as they are really SW Exceptions. SVM has * not updated RIP to point after the trapping instruction (INT3/INTO). */ return (vector != 3) && (vector != 4); default: /* Software exceptions/interrupts can be re-executed (e.g., INT n). */ break; } return 0;}/* * Combine two hardware exceptions: @vec2 was raised during delivery of @vec1. * This means we can assume that @vec2 is contributory or a page fault. */uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2){ /* Exception during double-fault delivery always causes a triple fault. */ if ( vec1 == TRAP_double_fault ) { hvm_triple_fault(); return TRAP_double_fault; /* dummy return */ } /* Exception during page-fault delivery always causes a double fault. */ if ( vec1 == TRAP_page_fault ) return TRAP_double_fault; /* Discard the first exception if it's benign or if we now have a #PF. */ if ( !((1u << vec1) & 0x7c01u) || (vec2 == TRAP_page_fault) ) return vec2; /* Cannot combine the exceptions: double fault. */ return TRAP_double_fault;}void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc){ u64 host_tsc; rdtscll(host_tsc); v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc; hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);}u64 hvm_get_guest_tsc(struct vcpu *v){ u64 host_tsc; if ( opt_softtsc ) host_tsc = hvm_get_guest_time(v); else rdtscll(host_tsc); return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;}void hvm_migrate_timers(struct vcpu *v){ rtc_migrate_timers(v); hpet_migrate_timers(v); pt_migrate(v);}void hvm_do_resume(struct vcpu *v){ ioreq_t *p; pt_restore_timer(v); /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */ p = &get_ioreq(v)->vp_ioreq; while ( p->state != STATE_IOREQ_NONE ) { switch ( p->state ) { case STATE_IORESP_READY: /* IORESP_READY -> NONE */ hvm_io_assist(); break; case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ case STATE_IOREQ_INPROCESS: wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port, (p->state != STATE_IOREQ_READY) && (p->state != STATE_IOREQ_INPROCESS)); break; default: gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state); domain_crash(v->domain); return; /* bail */ } }}static void hvm_init_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp){ memset(iorp, 0, sizeof(*iorp)); spin_lock_init(&iorp->lock); domain_pause(d);}static void hvm_destroy_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp){ spin_lock(&iorp->lock); ASSERT(d->is_dying); if ( iorp->va != NULL ) { unmap_domain_page_global(iorp->va); put_page_and_type(iorp->page); iorp->va = NULL; } spin_unlock(&iorp->lock);}static int hvm_set_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn){ struct page_info *page; p2m_type_t p2mt; unsigned long mfn; void *va; mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); if ( !p2m_is_ram(p2mt) ) return -EINVAL; ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( !get_page_and_type(page, d, PGT_writable_page) ) return -EINVAL; va = map_domain_page_global(mfn); if ( va == NULL ) { put_page_and_type(page); return -ENOMEM; } spin_lock(&iorp->lock); if ( (iorp->va != NULL) || d->is_dying ) { spin_unlock(&iorp->lock); unmap_domain_page_global(va); put_page_and_type(mfn_to_page(mfn)); return -EINVAL; } iorp->va = va; iorp->page = page; spin_unlock(&iorp->lock); domain_unpause(d); return 0;}static int hvm_print_line( int dir, uint32_t port, uint32_t bytes, uint32_t *val){ struct vcpu *curr = current; struct hvm_domain *hd = &curr->domain->arch.hvm_domain; char c = *val; BUG_ON(bytes != 1); spin_lock(&hd->pbuf_lock); hd->pbuf[hd->pbuf_idx++] = c; if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') ) { if ( c != '\n' ) hd->pbuf[hd->pbuf_idx++] = '\n'; hd->pbuf[hd->pbuf_idx] = '\0'; printk(XENLOG_G_DEBUG "HVM%u: %s", curr->domain->domain_id, hd->pbuf); hd->pbuf_idx = 0; } spin_unlock(&hd->pbuf_lock); return X86EMUL_OKAY;}int hvm_domain_initialise(struct domain *d){ int rc; if ( !hvm_enabled ) { gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest " "on a non-VT/AMDV platform.\n"); return -EINVAL; } spin_lock_init(&d->arch.hvm_domain.pbuf_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); spin_lock_init(&d->arch.hvm_domain.uc_lock); hvm_init_guest_time(d); d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; hvm_init_cacheattr_region_list(d); rc = paging_enable(d, PG_refcounts|PG_translate|PG_external); if ( rc != 0 ) goto fail1; vpic_init(d); rc = vioapic_init(d); if ( rc != 0 ) goto fail1; stdvga_init(d); rtc_init(d); hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); register_portio_handler(d, 0xe9, 1, hvm_print_line); rc = hvm_funcs.domain_initialise(d); if ( rc != 0 ) goto fail2; return 0; fail2: rtc_deinit(d); stdvga_deinit(d); vioapic_deinit(d); fail1: hvm_destroy_cacheattr_region_list(d); return rc;}void hvm_domain_relinquish_resources(struct domain *d){ hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); /* Stop all asynchronous timer actions. */ rtc_deinit(d); if ( d->vcpu[0] != NULL ) { pit_deinit(d); pmtimer_deinit(d); hpet_deinit(d); }}void hvm_domain_destroy(struct domain *d){ hvm_funcs.domain_destroy(d); rtc_deinit(d); stdvga_deinit(d); vioapic_deinit(d); hvm_destroy_cacheattr_region_list(d);}static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h){ struct vcpu *v; struct hvm_hw_cpu ctxt; struct segment_register seg; struct vcpu_guest_context *vc; for_each_vcpu ( d, v ) { /* We don't need to save state for a vcpu that is down; the restore * code will leave it down if there is nothing saved. */ if ( test_bit(_VPF_down, &v->pause_flags) ) continue; /* Architecture-specific vmcs/vmcb bits */ hvm_funcs.save_cpu_ctxt(v, &ctxt); hvm_get_segment_register(v, x86_seg_idtr, &seg); ctxt.idtr_limit = seg.limit; ctxt.idtr_base = seg.base; hvm_get_segment_register(v, x86_seg_gdtr, &seg); ctxt.gdtr_limit = seg.limit; ctxt.gdtr_base = seg.base; hvm_get_segment_register(v, x86_seg_cs, &seg); ctxt.cs_sel = seg.sel; ctxt.cs_limit = seg.limit; ctxt.cs_base = seg.base; ctxt.cs_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_ds, &seg); ctxt.ds_sel = seg.sel; ctxt.ds_limit = seg.limit; ctxt.ds_base = seg.base; ctxt.ds_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_es, &seg); ctxt.es_sel = seg.sel; ctxt.es_limit = seg.limit; ctxt.es_base = seg.base; ctxt.es_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_ss, &seg); ctxt.ss_sel = seg.sel; ctxt.ss_limit = seg.limit; ctxt.ss_base = seg.base; ctxt.ss_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_fs, &seg); ctxt.fs_sel = seg.sel; ctxt.fs_limit = seg.limit; ctxt.fs_base = seg.base; ctxt.fs_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_gs, &seg); ctxt.gs_sel = seg.sel; ctxt.gs_limit = seg.limit; ctxt.gs_base = seg.base; ctxt.gs_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_tr, &seg); ctxt.tr_sel = seg.sel; ctxt.tr_limit = seg.limit; ctxt.tr_base = seg.base; ctxt.tr_arbytes = seg.attr.bytes; hvm_get_segment_register(v, x86_seg_ldtr, &seg); ctxt.ldtr_sel = seg.sel; ctxt.ldtr_limit = seg.limit; ctxt.ldtr_base = seg.base; ctxt.ldtr_arbytes = seg.attr.bytes; vc = &v->arch.guest_context; if ( v->fpu_initialised ) memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs)); else memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs)); ctxt.rax = vc->user_regs.eax; ctxt.rbx = vc->user_regs.ebx; ctxt.rcx = vc->user_regs.ecx; ctxt.rdx = vc->user_regs.edx; ctxt.rbp = vc->user_regs.ebp; ctxt.rsi = vc->user_regs.esi; ctxt.rdi = vc->user_regs.edi; ctxt.rsp = vc->user_regs.esp; ctxt.rip = vc->user_regs.eip; ctxt.rflags = vc->user_regs.eflags;#ifdef __x86_64__ ctxt.r8 = vc->user_regs.r8;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?