📄 domain.c
字号:
/****************************************************************************** * arch/x86/domain.c * * x86-specific domain handling (e.g., register setup and context switching). *//* * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support * Gareth Hughes <gareth@valinux.com>, May 2000 */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/errno.h>#include <xen/sched.h>#include <xen/domain.h>#include <xen/smp.h>#include <xen/delay.h>#include <xen/softirq.h>#include <xen/grant_table.h>#include <xen/iocap.h>#include <xen/kernel.h>#include <xen/multicall.h>#include <xen/irq.h>#include <xen/event.h>#include <xen/console.h>#include <xen/percpu.h>#include <xen/compat.h>#include <xen/acpi.h>#include <asm/regs.h>#include <asm/mc146818rtc.h>#include <asm/system.h>#include <asm/io.h>#include <asm/processor.h>#include <asm/desc.h>#include <asm/i387.h>#include <asm/mpspec.h>#include <asm/ldt.h>#include <asm/paging.h>#include <asm/hypercall.h>#include <asm/hvm/hvm.h>#include <asm/hvm/support.h>#include <asm/debugreg.h>#include <asm/msr.h>#include <asm/nmi.h>#include <asm/iommu.h>#ifdef CONFIG_COMPAT#include <compat/vcpu.h>#endifDEFINE_PER_CPU(struct vcpu *, curr_vcpu);DEFINE_PER_CPU(u64, efer);DEFINE_PER_CPU(unsigned long, cr4);static void paravirt_ctxt_switch_from(struct vcpu *v);static void paravirt_ctxt_switch_to(struct vcpu *v);static void vcpu_destroy_pagetables(struct vcpu *v);static void continue_idle_domain(struct vcpu *v){ reset_stack_and_jump(idle_loop);}static void continue_nonidle_domain(struct vcpu *v){ reset_stack_and_jump(ret_from_intr);}static void default_idle(void){ local_irq_disable(); if ( !softirq_pending(smp_processor_id()) ) safe_halt(); else local_irq_enable();}static void play_dead(void){ __cpu_disable(); /* This must be done before dead CPU ack */ cpu_exit_clear(); hvm_cpu_down(); wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; /* With physical CPU hotplug, we should halt the cpu. */ local_irq_disable(); for ( ; ; ) halt();}void idle_loop(void){ for ( ; ; ) { if (cpu_is_offline(smp_processor_id())) play_dead(); page_scrub_schedule_work(); default_idle(); do_softirq(); }}void startup_cpu_idle_loop(void){ struct vcpu *v = current; ASSERT(is_idle_vcpu(v)); cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask); cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask); reset_stack_and_jump(idle_loop);}void dump_pageframe_info(struct domain *d){ struct page_info *page; printk("Memory pages belonging to domain %u:\n", d->domain_id); if ( d->tot_pages >= 10 ) { printk(" DomPage list too long to display\n"); } else { list_for_each_entry ( page, &d->page_list, list ) { printk(" DomPage %p: caf=%08x, taf=%" PRtype_info "\n", _p(page_to_mfn(page)), page->count_info, page->u.inuse.type_info); } } list_for_each_entry ( page, &d->xenpage_list, list ) { printk(" XenPage %p: caf=%08x, taf=%" PRtype_info "\n", _p(page_to_mfn(page)), page->count_info, page->u.inuse.type_info); }}struct vcpu *alloc_vcpu_struct(void){ struct vcpu *v; if ( (v = xmalloc(struct vcpu)) != NULL ) memset(v, 0, sizeof(*v)); return v;}void free_vcpu_struct(struct vcpu *v){ xfree(v);}#ifdef CONFIG_COMPATint setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab){ struct domain *d = v->domain; unsigned i; struct page_info *pg; if ( !d->arch.mm_arg_xlat_l3 ) { pg = alloc_domheap_page(NULL); if ( !pg ) return -ENOMEM; d->arch.mm_arg_xlat_l3 = page_to_virt(pg); clear_page(d->arch.mm_arg_xlat_l3); } l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR); for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i ) { unsigned long va = COMPAT_ARG_XLAT_VIRT_START(v->vcpu_id) + i * PAGE_SIZE; l2_pgentry_t *l2tab; l1_pgentry_t *l1tab; if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) ) { pg = alloc_domheap_page(NULL); if ( !pg ) return -ENOMEM; clear_page(page_to_virt(pg)); d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR); } l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]); if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) ) { pg = alloc_domheap_page(NULL); if ( !pg ) return -ENOMEM; clear_page(page_to_virt(pg)); l2tab[l2_table_offset(va)] = l2e_from_page(pg, __PAGE_HYPERVISOR); } l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]); BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)])); pg = alloc_domheap_page(NULL); if ( !pg ) return -ENOMEM; l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR); } return 0;}static void release_arg_xlat_area(struct domain *d){ if ( d->arch.mm_arg_xlat_l3 ) { unsigned l3; for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 ) { if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) ) { l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]); unsigned l2; for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 ) { if ( l2e_get_intpte(l2tab[l2]) ) { l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]); unsigned l1; for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 ) { if ( l1e_get_intpte(l1tab[l1]) ) free_domheap_page(l1e_get_page(l1tab[l1])); } free_domheap_page(l2e_get_page(l2tab[l2])); } } free_domheap_page(l3e_get_page(d->arch.mm_arg_xlat_l3[l3])); } } free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3)); }}static int setup_compat_l4(struct vcpu *v){ struct page_info *pg = alloc_domheap_page(NULL); l4_pgentry_t *l4tab; int rc; if ( pg == NULL ) return -ENOMEM; /* This page needs to look like a pagetable so that it can be shadowed */ pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated; l4tab = copy_page(page_to_virt(pg), idle_pg_table); l4tab[0] = l4e_empty(); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_page(pg, __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 ) { free_domheap_page(pg); return rc; } v->arch.guest_table = pagetable_from_page(pg); v->arch.guest_table_user = v->arch.guest_table; return 0;}static void release_compat_l4(struct vcpu *v){ free_domheap_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_null(); v->arch.guest_table_user = pagetable_null();}static inline int may_switch_mode(struct domain *d){ return (!is_hvm_domain(d) && (d->tot_pages == 0));}int switch_native(struct domain *d){ l1_pgentry_t gdt_l1e; unsigned int vcpuid; if ( d == NULL ) return -EINVAL; if ( !may_switch_mode(d) ) return -EACCES; if ( !is_pv_32on64_domain(d) ) return 0; d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; release_arg_xlat_area(d); /* switch gdt */ gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) { d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; if (d->vcpu[vcpuid]) release_compat_l4(d->vcpu[vcpuid]); } return 0;}int switch_compat(struct domain *d){ l1_pgentry_t gdt_l1e; unsigned int vcpuid; if ( d == NULL ) return -EINVAL; if ( !may_switch_mode(d) ) return -EACCES; if ( is_pv_32on64_domain(d) ) return 0; d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; /* switch gdt */ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR); for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) { if ( (d->vcpu[vcpuid] != NULL) && (setup_compat_l4(d->vcpu[vcpuid]) != 0) ) goto undo_and_fail; d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; } domain_set_alloc_bitsize(d); return 0; undo_and_fail: d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; release_arg_xlat_area(d); gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); while ( vcpuid-- != 0 ) { if ( d->vcpu[vcpuid] != NULL ) release_compat_l4(d->vcpu[vcpuid]); d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; } return -ENOMEM;}#else#define release_arg_xlat_area(d) ((void)0)#define setup_compat_l4(v) 0#define release_compat_l4(v) ((void)0)#endifint vcpu_initialise(struct vcpu *v){ struct domain *d = v->domain; int rc; v->arch.vcpu_info_mfn = INVALID_MFN; v->arch.flags = TF_kernel_mode;#if defined(__i386__) mapcache_vcpu_init(v);#endif pae_l3_cache_init(&v->arch.pae_l3_cache); paging_vcpu_init(v); if ( is_hvm_domain(d) ) { if ( (rc = hvm_vcpu_initialise(v)) != 0 ) return rc; } else { /* PV guests by default have a 100Hz ticker. */ v->periodic_period = MILLISECS(10); /* PV guests get an emulated PIT too for video BIOSes to use. */ if ( !is_idle_domain(d) && (v->vcpu_id == 0) ) pit_init(v, cpu_khz); v->arch.schedule_tail = continue_nonidle_domain; v->arch.ctxt_switch_from = paravirt_ctxt_switch_from; v->arch.ctxt_switch_to = paravirt_ctxt_switch_to; if ( is_idle_domain(d) ) { v->arch.schedule_tail = continue_idle_domain; v->arch.cr3 = __pa(idle_pg_table); } v->arch.guest_context.ctrlreg[4] = real_cr4_to_pv_guest_cr4(mmu_cr4_features); } v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT); return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);}void vcpu_destroy(struct vcpu *v){ if ( is_pv_32on64_vcpu(v) ) release_compat_l4(v); if ( is_hvm_vcpu(v) ) hvm_vcpu_destroy(v);}int arch_domain_create(struct domain *d){#ifdef __x86_64__ struct page_info *pg; int i;#endif l1_pgentry_t gdt_l1e; int vcpuid, pdpt_order, paging_initialised = 0; int rc = -ENOMEM; d->arch.relmem = RELMEM_not_started; INIT_LIST_HEAD(&d->arch.relmem_list); pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order); if ( d->arch.mm_perdomain_pt == NULL ) goto fail; memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order); /* * Map Xen segments into every VCPU's GDT, irrespective of whether every * VCPU will actually be used. This avoids an NMI race during context * switch: if we take an interrupt after switching CR3 but before switching * GDT, and the old VCPU# is invalid in the new domain, we would otherwise * try to load CS from an invalid table. */ gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;#if defined(__i386__) mapcache_domain_init(d);#else /* __x86_64__ */ if ( (pg = alloc_domheap_page(NULL)) == NULL ) goto fail; d->arch.mm_perdomain_l2 = page_to_virt(pg); clear_page(d->arch.mm_perdomain_l2); for ( i = 0; i < (1 << pdpt_order); i++ ) d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] = l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i, __PAGE_HYPERVISOR);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -