📄 multi.c
字号:
/****************************************************************************** * arch/x86/mm/shadow/multi.c * * Simple, mostly-synchronous shadow page tables. * Parts of this code are Copyright (c) 2006 by XenSource Inc. * Parts of this code are Copyright (c) 2006 by Michael A Fetterman * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include <xen/config.h>#include <xen/types.h>#include <xen/mm.h>#include <xen/trace.h>#include <xen/sched.h>#include <xen/perfc.h>#include <xen/domain_page.h>#include <asm/page.h>#include <asm/current.h>#include <asm/shadow.h>#include <asm/flushtlb.h>#include <asm/hvm/hvm.h>#include <asm/hvm/cacheattr.h>#include <asm/mtrr.h>#include "private.h"#include "types.h"/* THINGS TO DO LATER: * * TEARDOWN HEURISTICS * Also: have a heuristic for when to destroy a previous paging-mode's * shadows. When a guest is done with its start-of-day 32-bit tables * and reuses the memory we want to drop those shadows. Start with * shadows in a page in two modes as a hint, but beware of clever tricks * like reusing a pagetable for both PAE and 64-bit during boot... * * PAE LINEAR MAPS * Rework shadow_get_l*e() to have the option of using map_domain_page() * instead of linear maps. Add appropriate unmap_l*e calls in the users. * Then we can test the speed difference made by linear maps. If the * map_domain_page() version is OK on PAE, we could maybe allow a lightweight * l3-and-l2h-only shadow mode for PAE PV guests that would allow them * to share l2h pages again. * * PSE disabled / PSE36 * We don't support any modes other than PSE enabled, PSE36 disabled. * Neither of those would be hard to change, but we'd need to be able to * deal with shadows made in one mode and used in another. */#define FETCH_TYPE_PREFETCH 1#define FETCH_TYPE_DEMAND 2#define FETCH_TYPE_WRITE 4typedef enum { ft_prefetch = FETCH_TYPE_PREFETCH, ft_demand_read = FETCH_TYPE_DEMAND, ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,} fetch_type_t;#ifdef DEBUG_TRACE_DUMPstatic char *fetch_type_names[] = { [ft_prefetch] "prefetch", [ft_demand_read] "demand read", [ft_demand_write] "demand write",};#endif/**************************************************************************//* Hash table mapping from guest pagetables to shadows * * Normal case: maps the mfn of a guest page to the mfn of its shadow page. * FL1's: maps the *gfn* of the start of a superpage to the mfn of a * shadow L1 which maps its "splinters". */static inline mfn_t get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)/* Look for FL1 shadows in the hash table */{ mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn), SH_type_fl1_shadow); return smfn;}static inline mfn_t get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)/* Look for shadows in the hash table */{ mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn), shadow_type); perfc_incr(shadow_get_shadow_status); return smfn;}static inline void set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)/* Put an FL1 shadow into the hash table */{ SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n", gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn)); shadow_hash_insert(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);}static inline void set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)/* Put a shadow into the hash table */{ struct domain *d = v->domain; int res; SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n", d->domain_id, v->vcpu_id, mfn_x(gmfn), shadow_type, mfn_x(smfn)); /* 32-on-64 PV guests don't own their l4 pages so can't get_page them */ if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow ) { res = get_page(mfn_to_page(gmfn), d); ASSERT(res == 1); } shadow_hash_insert(v, mfn_x(gmfn), shadow_type, smfn);}static inline void delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)/* Remove a shadow from the hash table */{ SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n", gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn)); shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);}static inline void delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)/* Remove a shadow from the hash table */{ SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n", v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), shadow_type, mfn_x(smfn)); shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn); /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */ if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow ) put_page(mfn_to_page(gmfn));}/**************************************************************************//* CPU feature support querying */static inline intguest_supports_superpages(struct vcpu *v){ /* The _PAGE_PSE bit must be honoured in HVM guests, whenever * CR4.PSE is set or the guest is in PAE or long mode. * It's also used in the dummy PT for vcpus with CR4.PG cleared. */ return (is_hvm_vcpu(v) && (GUEST_PAGING_LEVELS != 2 || !hvm_paging_enabled(v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));}static inline intguest_supports_nx(struct vcpu *v){ if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx ) return 0; if ( !is_hvm_vcpu(v) ) return cpu_has_nx; return hvm_nx_enabled(v);}/**************************************************************************//* Functions for walking the guest page tables *//* Flags that are needed in a pagetable entry, with the sense of NX inverted */static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) { static uint32_t flags[] = { /* I/F - Usr Wr */ /* 0 0 0 0 */ _PAGE_PRESENT, /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW, /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER, /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, /* 0 1 0 0 */ _PAGE_PRESENT, /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW, /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER, /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER, /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT, /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT, /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT, /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT, }; /* Don't demand not-NX if the CPU wouldn't enforce it. */ if ( !guest_supports_nx(v) ) pfec &= ~PFEC_insn_fetch; /* Don't demand R/W if the CPU wouldn't enforce it. */ if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v)) && !(pfec & PFEC_user_mode) ) pfec &= ~PFEC_write_access; return flags[(pfec & 0x1f) >> 1];}/* Modify a guest pagetable entry to set the Accessed and Dirty bits. * Returns non-zero if it actually writes to guest memory. */static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty){ guest_intpte_t old, new; old = *(guest_intpte_t *)walk_p; new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0); if ( old != new ) { /* Write the new entry into the walk, and try to write it back * into the guest table as well. If the guest table has changed * under out feet then leave it alone. */ *(guest_intpte_t *)walk_p = new; if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) return 1; } return 0;}/* This validation is called with lock held, and after write permission * removal. Then check is atomic and no more inconsistent content can * be observed before lock is released * * Return 1 to indicate success and 0 for inconsistency */static inline uint32_tshadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw){ struct domain *d = v->domain; guest_l1e_t *l1p; guest_l2e_t *l2p;#if GUEST_PAGING_LEVELS >= 4 guest_l3e_t *l3p; guest_l4e_t *l4p;#endif int mismatch = 0; ASSERT(shadow_locked_by_me(d)); if ( gw->version == atomic_read(&d->arch.paging.shadow.gtable_dirty_version) ) return 1; /* We may consider caching guest page mapping from last * guest table walk. However considering this check happens * relatively less-frequent, and a bit burden here to * remap guest page is better than caching mapping in each * guest table walk. * * Also when inconsistency occurs, simply return to trigger * another fault instead of re-validate new path to make * logic simple. */ perfc_incr(shadow_check_gwalk);#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable; mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4); l3p = sh_map_domain_page(gw->l3mfn); mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3); sh_unmap_domain_page(l3p);#else mismatch |= (gw->l3e.l3 != v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);#endif l2p = sh_map_domain_page(gw->l2mfn); mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2); sh_unmap_domain_page(l2p);#else l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable; mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);#endif if ( !(guest_supports_superpages(v) && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) ) { l1p = sh_map_domain_page(gw->l1mfn); mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1); sh_unmap_domain_page(l1p); } return !mismatch;}/* Remove write access permissions from a gwalk_t in a batch, and * return OR-ed result for TLB flush hint and need to rewalk the guest * pages. * * Syncing pages will remove write access to that page; but it may * also give write access to other pages in the path. If we resync any * pages, re-walk from the beginning. */#define GW_RMWR_FLUSHTLB 1#define GW_RMWR_REWALK 2static inline uint32_tgw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw){ uint32_t rc = 0;#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) if ( mfn_is_out_of_sync(gw->l3mfn) ) { sh_resync(v, gw->l3mfn); rc = GW_RMWR_REWALK; } else#endif /* OOS */ if ( sh_remove_write_access(v, gw->l3mfn, 3, va) ) rc = GW_RMWR_FLUSHTLB;#endif /* GUEST_PAGING_LEVELS >= 4 */#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) if ( mfn_is_out_of_sync(gw->l2mfn) ) { sh_resync(v, gw->l2mfn); rc |= GW_RMWR_REWALK; } else#endif /* OOS */ if ( sh_remove_write_access(v, gw->l2mfn, 2, va) ) rc |= GW_RMWR_FLUSHTLB;#endif /* GUEST_PAGING_LEVELS >= 3 */ if ( !(guest_supports_superpages(v) && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE))#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) && !mfn_is_out_of_sync(gw->l1mfn)#endif /* OOS */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -