📄 mm.c

📁 xen 3.2.2 源码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/****************************************************************************** * arch/x86/mm.c *  * Copyright (c) 2002-2005 K A Fraser * Copyright (c) 2004 Christian Limpach *  * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *  * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *  * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* * A description of the x86 page table API: *  * Domains trap to do_mmu_update with a list of update requests. * This is a list of (ptr, val) pairs, where the requested operation * is *ptr = val. *  * Reference counting of pages: * ---------------------------- * Each page has two refcounts: tot_count and type_count. *  * TOT_COUNT is the obvious reference count. It counts all uses of a * physical page frame by a domain, including uses as a page directory, * a page table, or simple mappings via a PTE. This count prevents a * domain from releasing a frame back to the free pool when it still holds * a reference to it. *  * TYPE_COUNT is more subtle. A frame can be put to one of three * mutually-exclusive uses: it might be used as a page directory, or a * page table, or it may be mapped writable by the domain [of course, a * frame may not be used in any of these three ways!]. * So, type_count is a count of the number of times a frame is being  * referred to in its current incarnation. Therefore, a page can only * change its type when its type count is zero. *  * Pinning the page type: * ---------------------- * The type of a page can be pinned/unpinned with the commands * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, * pinning is not reference counted, so it can't be nested). * This is useful to prevent a page's type count falling to zero, at which * point safety checks would need to be carried out next time the count * is increased again. *  * A further note on writable page mappings: * ----------------------------------------- * For simplicity, the count of writable mappings for a page may not * correspond to reality. The 'writable count' is incremented for every * PTE which maps the page with the _PAGE_RW flag set. However, for * write access to be possible the page directory entry must also have * its _PAGE_RW bit set. We do not check this as it complicates the  * reference counting considerably [consider the case of multiple * directory entries referencing a single page table, some with the RW * bit set, others not -- it starts getting a bit messy]. * In normal use, this simplification shouldn't be a problem. * However, the logic can be added if required. *  * One more note on read-only page mappings: * ----------------------------------------- * We want domains to be able to map pages for read-only access. The * main reason is that page tables and directories should be readable * by a domain, but it would not be safe for them to be writable. * However, domains have free access to rings 1 & 2 of the Intel * privilege model. In terms of page protection, these are considered * to be part of 'supervisor mode'. The WP bit in CR0 controls whether * read-only restrictions are respected in supervisor mode -- if the  * bit is clear then any mapped page is writable. *  * We get round this by always setting the WP bit and disallowing  * updates to it. This is very unlikely to cause a problem for guest * OS's, which will generally use the WP bit to simplify copy-on-write * implementation (in that case, OS wants a fault when it writes to * an application-supplied buffer). */#include <xen/config.h>#include <xen/init.h>#include <xen/kernel.h>#include <xen/lib.h>#include <xen/mm.h>#include <xen/domain.h>#include <xen/sched.h>#include <xen/errno.h>#include <xen/perfc.h>#include <xen/irq.h>#include <xen/softirq.h>#include <xen/domain_page.h>#include <xen/event.h>#include <xen/iocap.h>#include <xen/guest_access.h>#include <asm/paging.h>#include <asm/shadow.h>#include <asm/page.h>#include <asm/flushtlb.h>#include <asm/io.h>#include <asm/ldt.h>#include <asm/x86_emulate.h>#include <asm/e820.h>#include <asm/hypercall.h>#include <asm/shared.h>#include <public/memory.h>#include <xsm/xsm.h>#include <xen/trace.h>#define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)/* * PTE updates can be done with ordinary writes except: *  1. Debug builds get extra checking by using CMPXCHG[8B]. *  2. PAE builds perform an atomic 8-byte store with CMPXCHG8B. */#if !defined(NDEBUG) || defined(CONFIG_X86_PAE)#define PTE_UPDATE_WITH_CMPXCHG#endif/* Used to defer flushing of memory structures. */struct percpu_mm_info {#define DOP_FLUSH_TLB      (1<<0) /* Flush the local TLB.                    */#define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */#define DOP_RELOAD_LDT     (1<<2) /* Reload the LDT shadow mapping.          */    unsigned int   deferred_ops;    /* If non-NULL, specifies a foreign subject domain for some operations. */    struct domain *foreign;};static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);/* * Returns the current foreign domain; defaults to the currently-executing * domain if a foreign override hasn't been specified. */#define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)/* Private domain structs for DOMID_XEN and DOMID_IO. */static struct domain *dom_xen, *dom_io;/* Frame table and its size in pages. */struct page_info *frame_table;unsigned long max_page;unsigned long total_pages;#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)#define l1_disallow_mask(d)                                     \    ((d != dom_io) &&                                           \     (rangeset_is_empty((d)->iomem_caps) &&                     \      rangeset_is_empty((d)->arch.ioport_caps)) ?               \     L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS))#ifdef CONFIG_COMPATl2_pgentry_t *compat_idle_pg_table_l2 = NULL;#define l3_disallow_mask(d) (!is_pv_32on64_domain(d) ?  \                             L3_DISALLOW_MASK :         \                             COMPAT_L3_DISALLOW_MASK)#else#define l3_disallow_mask(d) L3_DISALLOW_MASK#endifstatic void queue_deferred_ops(struct domain *d, unsigned int ops){    ASSERT(d == current->domain);    this_cpu(percpu_mm_info).deferred_ops |= ops;}void __init init_frametable(void){    unsigned long nr_pages, page_step, i, mfn;    frame_table = (struct page_info *)FRAMETABLE_VIRT_START;    nr_pages  = PFN_UP(max_page * sizeof(*frame_table));    page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;    for ( i = 0; i < nr_pages; i += page_step )    {        mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);        if ( mfn == 0 )            panic("Not enough memory for frame table\n");        map_pages_to_xen(            FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),            mfn, page_step, PAGE_HYPERVISOR);    }    memset(frame_table, 0, nr_pages << PAGE_SHIFT);}void __init arch_init_memory(void){    extern void subarch_init_memory(void);    unsigned long i, pfn, rstart_pfn, rend_pfn;    /*     * Initialise our DOMID_XEN domain.     * Any Xen-heap pages that we will allow to be mapped will have     * their domain field set to dom_xen.     */    dom_xen = alloc_domain(DOMID_XEN);    BUG_ON(dom_xen == NULL);    /*     * Initialise our DOMID_IO domain.     * This domain owns I/O pages that are within the range of the page_info     * array. Mappings occur at the priv of the caller.     */    dom_io = alloc_domain(DOMID_IO);    BUG_ON(dom_io == NULL);    /* First 1MB of RAM is historically marked as I/O. */    for ( i = 0; i < 0x100; i++ )        share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);     /* Any areas not specified as RAM by the e820 map are considered I/O. */    for ( i = 0, pfn = 0; pfn < max_page; i++ )    {        while ( (i < e820.nr_map) &&                (e820.map[i].type != E820_RAM) &&                (e820.map[i].type != E820_UNUSABLE) )            i++;        if ( i >= e820.nr_map )        {            /* No more RAM regions: mark as I/O right to end of memory map. */            rstart_pfn = rend_pfn = max_page;        }        else        {            /* Mark as I/O just up as far as next RAM region. */            rstart_pfn = min_t(unsigned long, max_page,                               PFN_UP(e820.map[i].addr));            rend_pfn   = max_t(unsigned long, rstart_pfn,                               PFN_DOWN(e820.map[i].addr + e820.map[i].size));        }        /* Mark as I/O up to next RAM region. */        for ( ; pfn < rstart_pfn; pfn++ )        {            BUG_ON(!mfn_valid(pfn));            share_xen_page_with_guest(                mfn_to_page(pfn), dom_io, XENSHARE_writable);        }        /* Skip the RAM region. */        pfn = rend_pfn;    }    subarch_init_memory();}int memory_is_conventional_ram(paddr_t p){    int i;    for ( i = 0; i < e820.nr_map; i++ )    {        if ( (e820.map[i].type == E820_RAM) &&             (e820.map[i].addr <= p) &&             (e820.map[i].size > p) )            return 1;    }    return 0;}unsigned long domain_get_maximum_gpfn(struct domain *d){    if ( is_hvm_domain(d) )        return d->arch.p2m.max_mapped_pfn;    /* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */    return arch_get_max_pfn(d) - 1;}void share_xen_page_with_guest(    struct page_info *page, struct domain *d, int readonly){    if ( page_get_owner(page) == d )        return;    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);    spin_lock(&d->page_alloc_lock);    /* The incremented type count pins as writable or read-only. */    page->u.inuse.type_info  = (readonly ? PGT_none : PGT_writable_page);    page->u.inuse.type_info |= PGT_validated | 1;    page_set_owner(page, d);    wmb(); /* install valid domain ptr before updating refcnt. */    ASSERT(page->count_info == 0);    /* Only add to the allocation list if the domain isn't dying. */    if ( !d->is_dying )    {        page->count_info |= PGC_allocated | 1;        if ( unlikely(d->xenheap_pages++ == 0) )            get_knownalive_domain(d);        list_add_tail(&page->list, &d->xenpage_list);    }    spin_unlock(&d->page_alloc_lock);}void share_xen_page_with_privileged_guests(    struct page_info *page, int readonly){    share_xen_page_with_guest(page, dom_xen, readonly);}#if defined(CONFIG_X86_PAE)#ifdef NDEBUG/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */#define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)#else/* * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths. * We cannot safely shadow the idle page table, nor shadow (v1) page tables * (detected by lack of an owning domain). As required for correctness, we * always shadow PDPTs above 4GB. */#define l3tab_needs_shadow(mfn)                         \    (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \      (page_get_owner(mfn_to_page(mfn)) != NULL) &&     \      ((mfn) & 1)) || /* odd MFNs are shadowed */       \     ((mfn) >= 0x100000))#endifstatic l1_pgentry_t *fix_pae_highmem_pl1e;/* Cache the address of PAE high-memory fixmap page tables. */static int __init cache_pae_fixmap_address(void){    unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);    l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);    fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);    return 0;}__initcall(cache_pae_fixmap_address);static DEFINE_PER_CPU(u32, make_cr3_timestamp);void make_cr3(struct vcpu *v, unsigned long mfn)/* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if * necessary, and sets v->arch.cr3 to the value to load in CR3. */{    l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;    struct pae_l3_cache *cache = &v->arch.pae_l3_cache;    unsigned int cpu = smp_processor_id();    /* Fast path: does this mfn need a shadow at all? */    if ( !l3tab_needs_shadow(mfn) )    {        v->arch.cr3 = mfn << PAGE_SHIFT;        /* Cache is no longer in use or valid */        cache->high_mfn = 0;        return;    }    /* Caching logic is not interrupt safe. */    ASSERT(!in_irq());    /* Protects against pae_flush_pgd(). */    spin_lock(&cache->lock);    cache->inuse_idx ^= 1;    cache->high_mfn   = mfn;    /* Map the guest L3 table and copy to the chosen low-memory cache. */    l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));    /* First check the previous high mapping can't be in the TLB.      * (i.e. have we loaded CR3 since we last did this?) */    if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )        flush_tlb_one_local(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));    highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);    lowmem_l3tab  = cache->table[cache->inuse_idx];    memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));    l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());    this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);    v->arch.cr3 = __pa(lowmem_l3tab);    spin_unlock(&cache->lock);}#else /* !CONFIG_X86_PAE */void make_cr3(struct vcpu *v, unsigned long mfn){    v->arch.cr3 = mfn << PAGE_SHIFT;}#endif /* !CONFIG_X86_PAE */void write_ptbase(struct vcpu *v){    write_cr3(v->arch.cr3);}/* * Should be called after CR3 is updated. *  * Uses values found in vcpu->arch.(guest_table and guest_table_user), and * for HVM guests, arch.monitor_table and hvm's guest CR3. *
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -