domain.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 1,879 行 · 第 1/4 页

C
1,879
字号
/****************************************************************************** * arch/x86/domain.c * * x86-specific domain handling (e.g., register setup and context switching). *//* *  Copyright (C) 1995  Linus Torvalds * *  Pentium III FXSR, SSE support *  Gareth Hughes <gareth@valinux.com>, May 2000 */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/errno.h>#include <xen/sched.h>#include <xen/domain.h>#include <xen/smp.h>#include <xen/delay.h>#include <xen/softirq.h>#include <xen/grant_table.h>#include <xen/iocap.h>#include <xen/kernel.h>#include <xen/multicall.h>#include <xen/irq.h>#include <xen/event.h>#include <xen/console.h>#include <xen/percpu.h>#include <xen/compat.h>#include <xen/acpi.h>#include <xen/pci.h>#include <asm/regs.h>#include <asm/mc146818rtc.h>#include <asm/system.h>#include <asm/io.h>#include <asm/processor.h>#include <asm/desc.h>#include <asm/i387.h>#include <asm/mpspec.h>#include <asm/ldt.h>#include <asm/paging.h>#include <asm/hypercall.h>#include <asm/hvm/hvm.h>#include <asm/hvm/support.h>#include <asm/debugreg.h>#include <asm/msr.h>#include <asm/nmi.h>#include <xen/numa.h>#include <xen/iommu.h>#ifdef CONFIG_COMPAT#include <compat/vcpu.h>#endifDEFINE_PER_CPU(struct vcpu *, curr_vcpu);DEFINE_PER_CPU(u64, efer);DEFINE_PER_CPU(unsigned long, cr4);static void default_idle(void);void (*pm_idle) (void) = default_idle;static void paravirt_ctxt_switch_from(struct vcpu *v);static void paravirt_ctxt_switch_to(struct vcpu *v);static void vcpu_destroy_pagetables(struct vcpu *v);static void continue_idle_domain(struct vcpu *v){    reset_stack_and_jump(idle_loop);}static void continue_nonidle_domain(struct vcpu *v){    reset_stack_and_jump(ret_from_intr);}static void default_idle(void){    local_irq_disable();    if ( !softirq_pending(smp_processor_id()) )        safe_halt();    else        local_irq_enable();}static void play_dead(void){    /* This must be done before dead CPU ack */    cpu_exit_clear();    hvm_cpu_down();    wbinvd();    mb();    /* Ack it */    __get_cpu_var(cpu_state) = CPU_DEAD;    /* With physical CPU hotplug, we should halt the cpu. */    local_irq_disable();    for ( ; ; )        halt();}void idle_loop(void){    for ( ; ; )    {        if ( cpu_is_offline(smp_processor_id()) )            play_dead();        page_scrub_schedule_work();        (*pm_idle)();        do_softirq();    }}void startup_cpu_idle_loop(void){    struct vcpu *v = current;    ASSERT(is_idle_vcpu(v));    cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);    cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);    reset_stack_and_jump(idle_loop);}void dump_pageframe_info(struct domain *d){    struct page_info *page;    printk("Memory pages belonging to domain %u:\n", d->domain_id);    if ( d->tot_pages >= 10 )    {        printk("    DomPage list too long to display\n");    }    else    {        list_for_each_entry ( page, &d->page_list, list )        {            printk("    DomPage %p: caf=%08x, taf=%" PRtype_info "\n",                   _p(page_to_mfn(page)),                   page->count_info, page->u.inuse.type_info);        }    }    list_for_each_entry ( page, &d->xenpage_list, list )    {        printk("    XenPage %p: caf=%08x, taf=%" PRtype_info "\n",               _p(page_to_mfn(page)),               page->count_info, page->u.inuse.type_info);    }}struct vcpu *alloc_vcpu_struct(void){    struct vcpu *v;    if ( (v = xmalloc(struct vcpu)) != NULL )        memset(v, 0, sizeof(*v));    return v;}void free_vcpu_struct(struct vcpu *v){    xfree(v);}#ifdef CONFIG_COMPATstatic int setup_compat_l4(struct vcpu *v){    struct page_info *pg = alloc_domheap_page(NULL, 0);    l4_pgentry_t *l4tab;    if ( pg == NULL )        return -ENOMEM;    /* This page needs to look like a pagetable so that it can be shadowed */    pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;    l4tab = copy_page(page_to_virt(pg), idle_pg_table);    l4tab[0] = l4e_empty();    l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =        l4e_from_page(pg, __PAGE_HYPERVISOR);    l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =        l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),                       __PAGE_HYPERVISOR);    v->arch.guest_table = pagetable_from_page(pg);    v->arch.guest_table_user = v->arch.guest_table;    return 0;}static void release_compat_l4(struct vcpu *v){    free_domheap_page(pagetable_get_page(v->arch.guest_table));    v->arch.guest_table = pagetable_null();    v->arch.guest_table_user = pagetable_null();}static inline int may_switch_mode(struct domain *d){    return (!is_hvm_domain(d) && (d->tot_pages == 0));}int switch_native(struct domain *d){    l1_pgentry_t gdt_l1e;    unsigned int vcpuid;    if ( d == NULL )        return -EINVAL;    if ( !may_switch_mode(d) )        return -EACCES;    if ( !is_pv_32on64_domain(d) )        return 0;    d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;    /* switch gdt */    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);    for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )    {        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;        if (d->vcpu[vcpuid])            release_compat_l4(d->vcpu[vcpuid]);    }    return 0;}int switch_compat(struct domain *d){    l1_pgentry_t gdt_l1e;    unsigned int vcpuid;    if ( d == NULL )        return -EINVAL;    if ( !may_switch_mode(d) )        return -EACCES;    if ( is_pv_32on64_domain(d) )        return 0;    d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;    /* switch gdt */    gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);    for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )    {        if ( (d->vcpu[vcpuid] != NULL) &&             (setup_compat_l4(d->vcpu[vcpuid]) != 0) )            goto undo_and_fail;        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;    }    domain_set_alloc_bitsize(d);    return 0; undo_and_fail:    d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);    while ( vcpuid-- != 0 )    {        if ( d->vcpu[vcpuid] != NULL )            release_compat_l4(d->vcpu[vcpuid]);        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;    }    return -ENOMEM;}#else#define setup_compat_l4(v) 0#define release_compat_l4(v) ((void)0)#endifint vcpu_initialise(struct vcpu *v){    struct domain *d = v->domain;    int rc;    v->arch.vcpu_info_mfn = INVALID_MFN;    v->arch.flags = TF_kernel_mode;#if defined(__i386__)    mapcache_vcpu_init(v);#endif    pae_l3_cache_init(&v->arch.pae_l3_cache);    paging_vcpu_init(v);    if ( is_hvm_domain(d) )    {        if ( (rc = hvm_vcpu_initialise(v)) != 0 )            return rc;    }    else    {        /* PV guests by default have a 100Hz ticker. */        v->periodic_period = MILLISECS(10);        /* PV guests get an emulated PIT too for video BIOSes to use. */        if ( !is_idle_domain(d) && (v->vcpu_id == 0) )            pit_init(v, cpu_khz);        v->arch.schedule_tail = continue_nonidle_domain;        v->arch.ctxt_switch_from = paravirt_ctxt_switch_from;        v->arch.ctxt_switch_to   = paravirt_ctxt_switch_to;        if ( is_idle_domain(d) )        {            v->arch.schedule_tail = continue_idle_domain;            v->arch.cr3           = __pa(idle_pg_table);        }        v->arch.guest_context.ctrlreg[4] =            real_cr4_to_pv_guest_cr4(mmu_cr4_features);    }    v->arch.perdomain_ptes =        d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);    return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);}void vcpu_destroy(struct vcpu *v){    if ( is_pv_32on64_vcpu(v) )        release_compat_l4(v);    if ( is_hvm_vcpu(v) )        hvm_vcpu_destroy(v);}int arch_domain_create(struct domain *d, unsigned int domcr_flags){#ifdef __x86_64__    struct page_info *pg;#endif    l1_pgentry_t gdt_l1e;    int i, vcpuid, pdpt_order, paging_initialised = 0;    int rc = -ENOMEM;    d->arch.hvm_domain.hap_enabled =        is_hvm_domain(d) &&        hvm_funcs.hap_supported &&        (domcr_flags & DOMCRF_hap);    INIT_LIST_HEAD(&d->arch.pdev_list);    d->arch.relmem = RELMEM_not_started;    INIT_LIST_HEAD(&d->arch.relmem_list);    pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));    d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);    if ( d->arch.mm_perdomain_pt == NULL )        goto fail;    memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);    /*     * Map Xen segments into every VCPU's GDT, irrespective of whether every     * VCPU will actually be used. This avoids an NMI race during context     * switch: if we take an interrupt after switching CR3 but before switching     * GDT, and the old VCPU# is invalid in the new domain, we would otherwise     * try to load CS from an invalid table.     */    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);    for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;#if defined(__i386__)    mapcache_domain_init(d);#else /* __x86_64__ */    pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));    if ( pg == NULL )        goto fail;    d->arch.mm_perdomain_l2 = page_to_virt(pg);    clear_page(d->arch.mm_perdomain_l2);    for ( i = 0; i < (1 << pdpt_order); i++ )        d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =            l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,                          __PAGE_HYPERVISOR);    pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));    if ( pg == NULL )        goto fail;    d->arch.mm_perdomain_l3 = page_to_virt(pg);    clear_page(d->arch.mm_perdomain_l3);    d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =        l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),                            __PAGE_HYPERVISOR);#endif /* __x86_64__ */#ifdef CONFIG_COMPAT    HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START;#endif    if ( (rc = paging_domain_init(d)) != 0 )        goto fail;    paging_initialised = 1;    if ( !is_idle_domain(d) )    {        d->arch.ioport_caps =             rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);        rc = -ENOMEM;        if ( d->arch.ioport_caps == NULL )            goto fail;        if ( (d->shared_info = alloc_xenheap_page()) == NULL )            goto fail;        clear_page(d->shared_info);        share_xen_page_with_guest(            virt_to_page(d->shared_info), d, XENSHARE_writable);        if ( (rc = iommu_domain_init(d)) != 0 )            goto fail;    }    spin_lock_init(&d->arch.irq_lock);    if ( is_hvm_domain(d) )    {        if ( (rc = hvm_domain_initialise(d)) != 0 )        {            iommu_domain_destroy(d);            goto fail;        }    }    else    {        /* 32-bit PV guest by default only if Xen is not 64-bit. */        d->arch.is_32bit_pv = d->arch.has_32bit_shinfo =            (CONFIG_PAGING_LEVELS != 4);    }    memset(d->arch.cpuids, 0, sizeof(d->arch.cpuids));    for ( i = 0; i < MAX_CPUID_INPUT; i++ )    {        d->arch.cpuids[i].input[0] = XEN_CPUID_INPUT_UNUSED;        d->arch.cpuids[i].input[1] = XEN_CPUID_INPUT_UNUSED;    }    return 0; fail:    d->is_dying = DOMDYING_dead;    free_xenheap_page(d->shared_info);    if ( paging_initialised )        paging_final_teardown(d);#ifdef __x86_64__    if ( d->arch.mm_perdomain_l2 )        free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2));    if ( d->arch.mm_perdomain_l3 )        free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));#endif    free_xenheap_pages(d->arch.mm_perdomain_pt, pdpt_order);    return rc;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?