📄 mm.c

📁 xen 3.2.2 源码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* *  Copyright (C) 2005 Intel Co *	Kun Tian (Kevin Tian) <kevin.tian@intel.com> * * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support * * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> *                    VA Linux Systems Japan K.K. *                    dom0 vp model support *//* * NOTES on SMP *  * * shared structures * There are some structures which are accessed by CPUs concurrently. * Here is the list of shared structures and operations on them which * read/write the structures. *  * - struct page_info *   This is a xen global resource. This structure is accessed by *   any CPUs. *  *   operations on this structure: *   - get_page() and its variant *   - put_page() and its variant *  * - vTLB *   vcpu->arch.{d, i}tlb: Software tlb cache. These are per VCPU data. *   DEFINE_PER_CPU (unsigned long, vhpt_paddr): VHPT table per physical CPU. *  *   domain_flush_vtlb_range() and domain_flush_vtlb_all() *   write vcpu->arch.{d, i}tlb and VHPT table of vcpu which isn't current. *   So there are potential races to read/write VHPT and vcpu->arch.{d, i}tlb. *   Please note that reading VHPT is done by hardware page table walker. *  *   operations on this structure: *   - global tlb purge *     vcpu_ptc_g(), vcpu_ptc_ga() and domain_page_flush_and_put() *     I.e. callers of domain_flush_vtlb_range() and domain_flush_vtlb_all() *     These functions invalidate VHPT entry and vcpu->arch.{i, d}tlb *  *   - tlb insert and fc *     vcpu_itc_i() *     vcpu_itc_d() *     ia64_do_page_fault() *     vcpu_fc() *     These functions set VHPT entry and vcpu->arch.{i, d}tlb. *     Actually vcpu_itc_no_srlz() does. *  * - the P2M table *   domain->mm and pgd, pud, pmd, pte table page. *   This structure is used to convert domain pseudo physical address *   to machine address. This is per domain resource. *  *   operations on this structure: *   - populate the P2M table tree *     lookup_alloc_domain_pte() and its variants. *   - set p2m entry *     assign_new_domain_page() and its variants. *     assign_domain_page() and its variants. *   - xchg p2m entry *     assign_domain_page_replace() *   - cmpxchg p2m entry *     assign_domain_page_cmpxchg_rel() *     replace_grant_host_mapping() *     steal_page() *     zap_domain_page_one() *   - read p2m entry *     lookup_alloc_domain_pte() and its variants. *      * - the M2P table *   mpt_table (or machine_to_phys_mapping) *   This is a table which converts from machine address to pseudo physical *   address. This is a global structure. *  *   operations on this structure: *   - set m2p entry *     set_gpfn_from_mfn() *   - zap m2p entry *     set_gpfn_from_mfn(INVALID_P2M_ENTRY) *   - get m2p entry *     get_gpfn_from_mfn() *  *  * * avoiding races * The resources which are shared by CPUs must be accessed carefully * to avoid race. * IA64 has weak memory ordering so that attention must be paid * to access shared structures. [SDM vol2 PartII chap. 2] *  * - struct page_info memory ordering *   get_page() has acquire semantics. *   put_page() has release semantics. *  * - populating the p2m table *   pgd, pud, pmd are append only. *  * - races when updating the P2M tables and the M2P table *   The P2M entry are shared by more than one vcpu. *   So they are accessed atomic operations. *   I.e. xchg or cmpxchg must be used to update the p2m entry. *   NOTE: When creating/destructing a domain, we don't need to take care of *         this race. *  *   The M2P table is inverse of the P2M table. *   I.e. P2M(M2P(p)) = p and M2P(P2M(m)) = m *   The M2P table and P2M table must be updated consistently. *   Here is the update sequence *  *   xchg or cmpxchg case *   - set_gpfn_from_mfn(new_mfn, gpfn) *   - memory barrier *   - atomic update of the p2m entry (xchg or cmpxchg the p2m entry) *     get old_mfn entry as a result. *   - memory barrier *   - set_gpfn_from_mfn(old_mfn, INVALID_P2M_ENTRY) *  *   Here memory barrier can be achieved by release semantics. *  * - races between global tlb purge and tlb insert *   This is a race between reading/writing vcpu->arch.{d, i}tlb or VHPT entry. *   When a vcpu is about to insert tlb, another vcpu may purge tlb *   cache globally. Inserting tlb (vcpu_itc_no_srlz()) or global tlb purge *   (domain_flush_vtlb_range() and domain_flush_vtlb_all()) can't update *   cpu->arch.{d, i}tlb, VHPT and mTLB. So there is a race here. *  *   Here check vcpu->arch.{d, i}tlb.p bit *   After inserting tlb entry, check the p bit and retry to insert. *   This means that when global tlb purge and tlb insert are issued *   simultaneously, always global tlb purge happens after tlb insert. *  * - races between p2m entry update and tlb insert *   This is a race between reading/writing the p2m entry. *   reader: vcpu_itc_i(), vcpu_itc_d(), ia64_do_page_fault(), vcpu_fc() *   writer: assign_domain_page_cmpxchg_rel(), replace_grant_host_mapping(),  *           steal_page(), zap_domain_page_one() *  *   For example, vcpu_itc_i() is about to insert tlb by calling *   vcpu_itc_no_srlz() after reading the p2m entry. *   At the same time, the p2m entry is replaced by xchg or cmpxchg and *   tlb cache of the page is flushed. *   There is a possibility that the p2m entry doesn't already point to the *   old page, but tlb cache still points to the old page. *   This can be detected similar to sequence lock using the p2m entry itself. *   reader remember the read value of the p2m entry, and insert tlb. *   Then read the p2m entry again. If the new p2m entry value is different *   from the used p2m entry value, the retry. *  * - races between referencing page and p2m entry update *   This is a race between reading/writing the p2m entry. *   reader: vcpu_get_domain_bundle(), vmx_get_domain_bundle(), *           efi_emulate_get_time() *   writer: assign_domain_page_cmpxchg_rel(), replace_grant_host_mapping(),  *           steal_page(), zap_domain_page_one() *  *   A page which assigned to a domain can be de-assigned by another vcpu. *   So before read/write to a domain page, the page's reference count  *   must be incremented. *   vcpu_get_domain_bundle(), vmx_get_domain_bundle() and *   efi_emulate_get_time() *  */#include <xen/config.h>#include <xen/sched.h>#include <xen/domain.h>#include <asm/xentypes.h>#include <xen/mm.h>#include <xen/errno.h>#include <asm/pgalloc.h>#include <asm/vhpt.h>#include <asm/vcpu.h>#include <asm/shadow.h>#include <asm/p2m_entry.h>#include <asm/tlb_track.h>#include <linux/efi.h>#include <linux/sort.h>#include <xen/guest_access.h>#include <asm/page.h>#include <asm/dom_fw_common.h>#include <public/memory.h>#include <asm/event.h>#include <asm/debugger.h>static void domain_page_flush_and_put(struct domain* d, unsigned long mpaddr,                                      volatile pte_t* ptep, pte_t old_pte,                                       struct page_info* page);extern unsigned long ia64_iobase;static struct domain *dom_xen, *dom_io;/* * This number is bigger than DOMID_SELF, DOMID_XEN and DOMID_IO. * If more reserved domain ids are introduced, this might be increased. */#define DOMID_P2M       (0x7FF8U)static struct domain *dom_p2m;// followings are stolen from arch_init_memory() @ xen/arch/x86/mm.cvoidalloc_dom_xen_and_dom_io(void){    /*     * Initialise our DOMID_XEN domain.     * Any Xen-heap pages that we will allow to be mapped will have     * their domain field set to dom_xen.     */    dom_xen = alloc_domain(DOMID_XEN);    BUG_ON(dom_xen == NULL);    /*     * Initialise our DOMID_IO domain.     * This domain owns I/O pages that are within the range of the page_info     * array. Mappings occur at the priv of the caller.     */    dom_io = alloc_domain(DOMID_IO);    BUG_ON(dom_io == NULL);}static intmm_teardown_can_skip(struct domain* d, unsigned long offset){    return d->arch.mm_teardown_offset > offset;}static voidmm_teardown_update_offset(struct domain* d, unsigned long offset){    d->arch.mm_teardown_offset = offset;}static voidmm_teardown_pte(struct domain* d, volatile pte_t* pte, unsigned long offset){    pte_t old_pte;    unsigned long mfn;    struct page_info* page;    old_pte = ptep_get_and_clear(&d->arch.mm, offset, pte);// acquire semantics        // vmx domain use bit[58:56] to distinguish io region from memory.    // see vmx_build_physmap_table() in vmx_init.c    if (!pte_mem(old_pte))        return;    // domain might map IO space or acpi table pages. check it.    mfn = pte_pfn(old_pte);    if (!mfn_valid(mfn))        return;    page = mfn_to_page(mfn);    BUG_ON(page_get_owner(page) == NULL);    // struct page_info corresponding to mfn may exist or not depending    // on CONFIG_VIRTUAL_FRAME_TABLE.    // The above check is too easy.    // The right way is to check whether this page is of io area or acpi pages    if (pte_pgc_allocated(old_pte)) {        BUG_ON(page_get_owner(page) != d);        BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);        set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);        if (test_and_clear_bit(_PGC_allocated, &page->count_info))            put_page(page);    } else {        put_page(page);    }}static intmm_teardown_pmd(struct domain* d, volatile pmd_t* pmd, unsigned long offset){    unsigned long i;    volatile pte_t* pte = pte_offset_map(pmd, offset);    for (i = 0; i < PTRS_PER_PTE; i++, pte++) {        unsigned long cur_offset = offset + (i << PAGE_SHIFT);        if (mm_teardown_can_skip(d, cur_offset + PAGE_SIZE))            continue;        if (!pte_present(*pte)) { // acquire semantics            mm_teardown_update_offset(d, cur_offset);            continue;        }        mm_teardown_update_offset(d, cur_offset);        mm_teardown_pte(d, pte, cur_offset);        if (hypercall_preempt_check())            return -EAGAIN;    }    return 0;}static intmm_teardown_pud(struct domain* d, volatile pud_t *pud, unsigned long offset){    unsigned long i;    volatile pmd_t *pmd = pmd_offset(pud, offset);    for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {        unsigned long cur_offset = offset + (i << PMD_SHIFT);        if (mm_teardown_can_skip(d, cur_offset + PMD_SIZE))            continue;        if (!pmd_present(*pmd)) { // acquire semantics            mm_teardown_update_offset(d, cur_offset);            continue;        }        if (mm_teardown_pmd(d, pmd, cur_offset))            return -EAGAIN;    }    return 0;}static intmm_teardown_pgd(struct domain* d, volatile pgd_t *pgd, unsigned long offset){    unsigned long i;    volatile pud_t *pud = pud_offset(pgd, offset);    for (i = 0; i < PTRS_PER_PUD; i++, pud++) {        unsigned long cur_offset = offset + (i << PUD_SHIFT);#ifndef __PAGETABLE_PUD_FOLDED        if (mm_teardown_can_skip(d, cur_offset + PUD_SIZE))            continue;#endif        if (!pud_present(*pud)) { // acquire semantics#ifndef __PAGETABLE_PUD_FOLDED            mm_teardown_update_offset(d, cur_offset);#endif            continue;        }        if (mm_teardown_pud(d, pud, cur_offset))            return -EAGAIN;    }    return 0;}intmm_teardown(struct domain* d){    struct mm_struct* mm = &d->arch.mm;    unsigned long i;    volatile pgd_t* pgd;    if (mm->pgd == NULL)        return 0;    pgd = pgd_offset(mm, 0);    for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {        unsigned long cur_offset = i << PGDIR_SHIFT;        if (mm_teardown_can_skip(d, cur_offset + PGDIR_SIZE))            continue;        if (!pgd_present(*pgd)) { // acquire semantics            mm_teardown_update_offset(d, cur_offset);            continue;        }        if (mm_teardown_pgd(d, pgd, cur_offset))            return -EAGAIN;    }    foreign_p2m_destroy(d);    return 0;}static voidmm_p2m_teardown_pmd(struct domain* d, volatile pmd_t* pmd,                    unsigned long offset){    pte_free_kernel(pte_offset_map(pmd, offset));}static voidmm_p2m_teardown_pud(struct domain* d, volatile pud_t *pud,                    unsigned long offset){    unsigned long i;    volatile pmd_t *pmd = pmd_offset(pud, offset);    for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {        if (!pmd_present(*pmd))            continue;        mm_p2m_teardown_pmd(d, pmd, offset + (i << PMD_SHIFT));    }    pmd_free(pmd_offset(pud, offset));}static voidmm_p2m_teardown_pgd(struct domain* d, volatile pgd_t *pgd,                    unsigned long offset){    unsigned long i;    volatile pud_t *pud = pud_offset(pgd, offset);    for (i = 0; i < PTRS_PER_PUD; i++, pud++) {        if (!pud_present(*pud))            continue;        mm_p2m_teardown_pud(d, pud, offset + (i << PUD_SHIFT));    }    pud_free(pud_offset(pgd, offset));}static voidmm_p2m_teardown(struct domain* d){    struct mm_struct* mm = &d->arch.mm;    unsigned long i;    volatile pgd_t* pgd;    BUG_ON(mm->pgd == NULL);    pgd = pgd_offset(mm, 0);    for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {        if (!pgd_present(*pgd))            continue;        mm_p2m_teardown_pgd(d, pgd, i << PGDIR_SHIFT);    }    pgd_free(mm->pgd);    mm->pgd = NULL;}voidmm_final_teardown(struct domain* d){    if (d->arch.shadow_bitmap != NULL) {        xfree(d->arch.shadow_bitmap);        d->arch.shadow_bitmap = NULL;    }
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -