📄 multi.c
字号:
/****************************************************************************** * arch/x86/mm/shadow/multi.c * * Simple, mostly-synchronous shadow page tables. * Parts of this code are Copyright (c) 2006 by XenSource Inc. * Parts of this code are Copyright (c) 2006 by Michael A Fetterman * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include <xen/config.h>#include <xen/types.h>#include <xen/mm.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/perfc.h>#include <xen/domain_page.h>#include <asm/page.h>#include <asm/current.h>#include <asm/shadow.h>#include <asm/flushtlb.h>#include <asm/hvm/hvm.h>#include <asm/hvm/cacheattr.h>#include <asm/mtrr.h>#include "private.h"#include "types.h"/* THINGS TO DO LATER: * * TEARDOWN HEURISTICS * Also: have a heuristic for when to destroy a previous paging-mode's * shadows. When a guest is done with its start-of-day 32-bit tables * and reuses the memory we want to drop those shadows. Start with * shadows in a page in two modes as a hint, but beware of clever tricks * like reusing a pagetable for both PAE and 64-bit during boot... * * PAE LINEAR MAPS * Rework shadow_get_l*e() to have the option of using map_domain_page() * instead of linear maps. Add appropriate unmap_l*e calls in the users. * Then we can test the speed difference made by linear maps. If the * map_domain_page() version is OK on PAE, we could maybe allow a lightweight * l3-and-l2h-only shadow mode for PAE PV guests that would allow them * to share l2h pages again. * * GUEST_WALK_TABLES TLB FLUSH COALESCE * guest_walk_tables can do up to three remote TLB flushes as it walks to * the first l1 of a new pagetable. Should coalesce the flushes to the end, * and if we do flush, re-do the walk. If anything has changed, then * pause all the other vcpus and do the walk *again*. * * PSE disabled / PSE36 * We don't support any modes other than PSE enabled, PSE36 disabled. * Neither of those would be hard to change, but we'd need to be able to * deal with shadows made in one mode and used in another. */#define FETCH_TYPE_PREFETCH 1#define FETCH_TYPE_DEMAND 2#define FETCH_TYPE_WRITE 4typedef enum { ft_prefetch = FETCH_TYPE_PREFETCH, ft_demand_read = FETCH_TYPE_DEMAND, ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,} fetch_type_t;#ifdef DEBUG_TRACE_DUMPstatic char *fetch_type_names[] = { [ft_prefetch] "prefetch", [ft_demand_read] "demand read", [ft_demand_write] "demand write",};#endif/**************************************************************************//* Hash table mapping from guest pagetables to shadows * * Normal case: maps the mfn of a guest page to the mfn of its shadow page. * FL1's: maps the *gfn* of the start of a superpage to the mfn of a * shadow L1 which maps its "splinters". */static inline mfn_t get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)/* Look for FL1 shadows in the hash table */{ mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn), SH_type_fl1_shadow); return smfn;}static inline mfn_t get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)/* Look for shadows in the hash table */{ mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn), shadow_type); perfc_incr(shadow_get_shadow_status); return smfn;}static inline void set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)/* Put an FL1 shadow into the hash table */{ SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n", gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn)); shadow_hash_insert(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);}static inline void set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)/* Put a shadow into the hash table */{ struct domain *d = v->domain; int res; SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n", d->domain_id, v->vcpu_id, mfn_x(gmfn), shadow_type, mfn_x(smfn)); /* 32-on-64 PV guests don't own their l4 pages so can't get_page them */ if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow ) { res = get_page(mfn_to_page(gmfn), d); ASSERT(res == 1); } shadow_hash_insert(v, mfn_x(gmfn), shadow_type, smfn);}static inline void delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)/* Remove a shadow from the hash table */{ SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n", gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn)); shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);}static inline void delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)/* Remove a shadow from the hash table */{ SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n", v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), shadow_type, mfn_x(smfn)); shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn); /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */ if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow ) put_page(mfn_to_page(gmfn));}/**************************************************************************//* CPU feature support querying */static inline intguest_supports_superpages(struct vcpu *v){ /* The _PAGE_PSE bit must be honoured in HVM guests, whenever * CR4.PSE is set or the guest is in PAE or long mode. * It's also used in the dummy PT for vcpus with CR4.PG cleared. */ return (is_hvm_vcpu(v) && (GUEST_PAGING_LEVELS != 2 || !hvm_paging_enabled(v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));}static inline intguest_supports_nx(struct vcpu *v){ if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx ) return 0; if ( !is_hvm_vcpu(v) ) return cpu_has_nx; return hvm_nx_enabled(v);}/**************************************************************************//* Functions for walking the guest page tables *//* Flags that are needed in a pagetable entry, with the sense of NX inverted */static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) { static uint32_t flags[] = { /* I/F - Usr Wr */ /* 0 0 0 0 */ _PAGE_PRESENT, /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW, /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER, /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, /* 0 1 0 0 */ _PAGE_PRESENT, /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW, /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER, /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, }; /* Don't demand not-NX if the CPU wouldn't enforce it. */ if ( !guest_supports_nx(v) ) pfec &= ~PFEC_insn_fetch; /* Don't demand R/W if the CPU wouldn't enforce it. */ if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v)) && !(pfec & PFEC_user_mode) ) pfec &= ~PFEC_write_access; return flags[(pfec & 0x1f) >> 1];}/* Modify a guest pagetable entry to set the Accessed and Dirty bits. * Returns non-zero if it actually writes to guest memory. */static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty){ guest_intpte_t old, new; old = *(guest_intpte_t *)walk_p; new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0); if ( old != new ) { /* Write the new entry into the walk, and try to write it back * into the guest table as well. If the guest table has changed * under out feet then leave it alone. */ *(guest_intpte_t *)walk_p = new; if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) return 1; } return 0;}/* Walk the guest pagetables, after the manner of a hardware walker. * * Inputs: a vcpu, a virtual address, a walk_t to fill, a * pointer to a pagefault code, and a flag "shadow_op". * * We walk the vcpu's guest pagetables, filling the walk_t with what we * see and adding any Accessed and Dirty bits that are needed in the * guest entries. Using the pagefault code, we check the permissions as * we go. For the purposes of reading pagetables we treat all non-RAM * memory as contining zeroes. * * If "shadow_op" is non-zero, we are serving a genuine guest memory access, * and must (a) be under the shadow lock, and (b) remove write access * from any guest PT pages we see, as we will be shadowing them soon * and will rely on the contents' not having changed. * * Returns 0 for success, or the set of permission bits that we failed on * if the walk did not complete. * N.B. This is different from the old return code but almost no callers * checked the old return code anyway. */static uint32_tguest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec, int shadow_op){ struct domain *d = v->domain; p2m_type_t p2mt; guest_l1e_t *l1p = NULL; guest_l2e_t *l2p = NULL;#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ guest_l3e_t *l3p = NULL; guest_l4e_t *l4p;#endif uint32_t gflags, mflags, rc = 0; int pse; ASSERT(!shadow_op || shadow_locked_by_me(d)); perfc_incr(shadow_guest_walk); memset(gw, 0, sizeof(*gw)); gw->va = va; /* Mandatory bits that must be set in every entry. We invert NX, to * calculate as if there were an "X" bit that allowed access. * We will accumulate, in rc, the set of flags that are missing. */ mflags = mandatory_flags(v, pfec);#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ /* Get the l4e from the top level table and check its flags*/ gw->l4mfn = pagetable_get_mfn(v->arch.guest_table); l4p = ((guest_l4e_t *)v->arch.paging.shadow.guest_vtable); gw->l4e = l4p[guest_l4_table_offset(va)]; gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; /* Map the l3 table */ gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt); if ( !p2m_is_ram(p2mt) ) { rc |= _PAGE_PRESENT; goto out; } ASSERT(mfn_valid(gw->l3mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); /* Get the l3e and check its flags*/ l3p = sh_map_domain_page(gw->l3mfn); gw->l3e = l3p[guest_l3_table_offset(va)]; gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out;#else /* PAE only... */ /* Get l3e from the cache of the top level table and check its flag */ gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)]; if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) { rc |= _PAGE_PRESENT; goto out; }#endif /* PAE or 64... */ /* Map the l2 table */ gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt); if ( !p2m_is_ram(p2mt) ) { rc |= _PAGE_PRESENT; goto out;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -