mmu.c
来自「linux 内核源代码」· C语言 代码 · 共 1,499 行 · 第 1/3 页
C
1,499 行
/* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * MMU support * * Copyright (C) 2006 Qumranet, Inc. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. * */#include "vmx.h"#include "kvm.h"#include <linux/types.h>#include <linux/string.h>#include <linux/mm.h>#include <linux/highmem.h>#include <linux/module.h>#include <asm/page.h>#include <asm/cmpxchg.h>#undef MMU_DEBUG#undef AUDIT#ifdef AUDITstatic void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);#elsestatic void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}#endif#ifdef MMU_DEBUG#define pgprintk(x...) do { if (dbg) printk(x); } while (0)#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)#else#define pgprintk(x...) do { } while (0)#define rmap_printk(x...) do { } while (0)#endif#if defined(MMU_DEBUG) || defined(AUDIT)static int dbg = 1;#endif#ifndef MMU_DEBUG#define ASSERT(x) do { } while (0)#else#define ASSERT(x) \ if (!(x)) { \ printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ __FILE__, __LINE__, #x); \ }#endif#define PT64_PT_BITS 9#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)#define PT32_PT_BITS 10#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)#define PT_WRITABLE_SHIFT 1#define PT_PRESENT_MASK (1ULL << 0)#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)#define PT_USER_MASK (1ULL << 2)#define PT_PWT_MASK (1ULL << 3)#define PT_PCD_MASK (1ULL << 4)#define PT_ACCESSED_MASK (1ULL << 5)#define PT_DIRTY_MASK (1ULL << 6)#define PT_PAGE_SIZE_MASK (1ULL << 7)#define PT_PAT_MASK (1ULL << 7)#define PT_GLOBAL_MASK (1ULL << 8)#define PT64_NX_MASK (1ULL << 63)#define PT_PAT_SHIFT 7#define PT_DIR_PAT_SHIFT 12#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)#define PT32_DIR_PSE36_SIZE 4#define PT32_DIR_PSE36_SHIFT 13#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)#define PT_FIRST_AVAIL_BITS_SHIFT 9#define PT64_SECOND_AVAIL_BITS_SHIFT 52#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)#define VALID_PAGE(x) ((x) != INVALID_PAGE)#define PT64_LEVEL_BITS 9#define PT64_LEVEL_SHIFT(level) \ ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )#define PT64_LEVEL_MASK(level) \ (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))#define PT64_INDEX(address, level)\ (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))#define PT32_LEVEL_BITS 10#define PT32_LEVEL_SHIFT(level) \ ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )#define PT32_LEVEL_MASK(level) \ (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))#define PT32_INDEX(address, level)\ (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))#define PT64_DIR_BASE_ADDR_MASK \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))#define PT32_BASE_ADDR_MASK PAGE_MASK#define PT32_DIR_BASE_ADDR_MASK \ (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))#define PFERR_PRESENT_MASK (1U << 0)#define PFERR_WRITE_MASK (1U << 1)#define PFERR_USER_MASK (1U << 2)#define PFERR_FETCH_MASK (1U << 4)#define PT64_ROOT_LEVEL 4#define PT32_ROOT_LEVEL 2#define PT32E_ROOT_LEVEL 3#define PT_DIRECTORY_LEVEL 2#define PT_PAGE_TABLE_LEVEL 1#define RMAP_EXT 4struct kvm_rmap_desc { u64 *shadow_ptes[RMAP_EXT]; struct kvm_rmap_desc *more;};static struct kmem_cache *pte_chain_cache;static struct kmem_cache *rmap_desc_cache;static struct kmem_cache *mmu_page_header_cache;static int is_write_protection(struct kvm_vcpu *vcpu){ return vcpu->cr0 & X86_CR0_WP;}static int is_cpuid_PSE36(void){ return 1;}static int is_nx(struct kvm_vcpu *vcpu){ return vcpu->shadow_efer & EFER_NX;}static int is_present_pte(unsigned long pte){ return pte & PT_PRESENT_MASK;}static int is_writeble_pte(unsigned long pte){ return pte & PT_WRITABLE_MASK;}static int is_io_pte(unsigned long pte){ return pte & PT_SHADOW_IO_MARK;}static int is_rmap_pte(u64 pte){ return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK)) == (PT_WRITABLE_MASK | PT_PRESENT_MASK);}static void set_shadow_pte(u64 *sptep, u64 spte){#ifdef CONFIG_X86_64 set_64bit((unsigned long *)sptep, spte);#else set_64bit((unsigned long long *)sptep, spte);#endif}static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, struct kmem_cache *base_cache, int min){ void *obj; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); if (!obj) return -ENOMEM; cache->objects[cache->nobjs++] = obj; } return 0;}static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc){ while (mc->nobjs) kfree(mc->objects[--mc->nobjs]);}static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, int min){ struct page *page; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; set_page_private(page, 0); cache->objects[cache->nobjs++] = page_address(page); } return 0;}static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc){ while (mc->nobjs) free_page((unsigned long)mc->objects[--mc->nobjs]);}static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu){ int r; kvm_mmu_free_some_pages(vcpu); r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, pte_chain_cache, 4); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, rmap_desc_cache, 1); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, mmu_page_header_cache, 4);out: return r;}static void mmu_free_memory_caches(struct kvm_vcpu *vcpu){ mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); mmu_free_memory_cache_page(&vcpu->mmu_page_cache); mmu_free_memory_cache(&vcpu->mmu_page_header_cache);}static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, size_t size){ void *p; BUG_ON(!mc->nobjs); p = mc->objects[--mc->nobjs]; memset(p, 0, size); return p;}static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu){ return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, sizeof(struct kvm_pte_chain));}static void mmu_free_pte_chain(struct kvm_pte_chain *pc){ kfree(pc);}static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu){ return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache, sizeof(struct kvm_rmap_desc));}static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd){ kfree(rd);}/* * Reverse mapping data structures: * * If page->private bit zero is zero, then page->private points to the * shadow page table entry that points to page_address(page). * * If page->private bit zero is one, (then page->private & ~1) points * to a struct kvm_rmap_desc containing more mappings. */static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte){ struct page *page; struct kvm_rmap_desc *desc; int i; if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); if (!page_private(page)) { rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); set_page_private(page,(unsigned long)spte); } else if (!(page_private(page) & 1)) { rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_rmap_desc(vcpu); desc->shadow_ptes[0] = (u64 *)page_private(page); desc->shadow_ptes[1] = spte; set_page_private(page,(unsigned long)desc | 1); } else { rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) desc = desc->more; if (desc->shadow_ptes[RMAP_EXT-1]) { desc->more = mmu_alloc_rmap_desc(vcpu); desc = desc->more; } for (i = 0; desc->shadow_ptes[i]; ++i) ; desc->shadow_ptes[i] = spte; }}static void rmap_desc_remove_entry(struct page *page, struct kvm_rmap_desc *desc, int i, struct kvm_rmap_desc *prev_desc){ int j; for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) ; desc->shadow_ptes[i] = desc->shadow_ptes[j]; desc->shadow_ptes[j] = NULL; if (j != 0) return; if (!prev_desc && !desc->more) set_page_private(page,(unsigned long)desc->shadow_ptes[0]); else if (prev_desc) prev_desc->more = desc->more; else set_page_private(page,(unsigned long)desc->more | 1); mmu_free_rmap_desc(desc);}static void rmap_remove(u64 *spte){ struct page *page; struct kvm_rmap_desc *desc; struct kvm_rmap_desc *prev_desc; int i; if (!is_rmap_pte(*spte)) return; page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); if (!page_private(page)) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); BUG(); } else if (!(page_private(page) & 1)) { rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); if ((u64 *)page_private(page) != spte) { printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", spte, *spte); BUG(); } set_page_private(page,0); } else { rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); prev_desc = NULL; while (desc) { for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) if (desc->shadow_ptes[i] == spte) { rmap_desc_remove_entry(page, desc, i, prev_desc); return; } prev_desc = desc; desc = desc->more; } BUG(); }}static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn){ struct kvm *kvm = vcpu->kvm; struct page *page; struct kvm_rmap_desc *desc; u64 *spte; page = gfn_to_page(kvm, gfn); BUG_ON(!page); while (page_private(page)) { if (!(page_private(page) & 1)) spte = (u64 *)page_private(page); else { desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul); spte = desc->shadow_ptes[0]; } BUG_ON(!spte); BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT != page_to_pfn(page)); BUG_ON(!(*spte & PT_PRESENT_MASK)); BUG_ON(!(*spte & PT_WRITABLE_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); rmap_remove(spte); set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); kvm_flush_remote_tlbs(vcpu->kvm); }}#ifdef MMU_DEBUGstatic int is_empty_shadow_page(u64 *spt){ u64 *pos; u64 *end; for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) if (*pos != 0) { printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, pos, *pos); return 0; } return 1;}#endifstatic void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *page_head){ ASSERT(is_empty_shadow_page(page_head->spt)); list_del(&page_head->link); __free_page(virt_to_page(page_head->spt)); kfree(page_head); ++kvm->n_free_mmu_pages;}static unsigned kvm_page_table_hashfn(gfn_t gfn){ return gfn;}static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte){ struct kvm_mmu_page *page; if (!vcpu->kvm->n_free_mmu_pages) return NULL; page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, sizeof *page); page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(page->spt), (unsigned long)page); list_add(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->spt)); page->slot_bitmap = 0; page->multimapped = 0; page->parent_pte = parent_pte; --vcpu->kvm->n_free_mmu_pages; return page;}static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, u64 *parent_pte){
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?