mmu.c

来自「linux 内核源代码」· C语言 代码 · 共 1,499 行 · 第 1/3 页

C
1,499
字号
/* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * MMU support * * Copyright (C) 2006 Qumranet, Inc. * * Authors: *   Yaniv Kamay  <yaniv@qumranet.com> *   Avi Kivity   <avi@qumranet.com> * * This work is licensed under the terms of the GNU GPL, version 2.  See * the COPYING file in the top-level directory. * */#include "vmx.h"#include "kvm.h"#include <linux/types.h>#include <linux/string.h>#include <linux/mm.h>#include <linux/highmem.h>#include <linux/module.h>#include <asm/page.h>#include <asm/cmpxchg.h>#undef MMU_DEBUG#undef AUDIT#ifdef AUDITstatic void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);#elsestatic void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}#endif#ifdef MMU_DEBUG#define pgprintk(x...) do { if (dbg) printk(x); } while (0)#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)#else#define pgprintk(x...) do { } while (0)#define rmap_printk(x...) do { } while (0)#endif#if defined(MMU_DEBUG) || defined(AUDIT)static int dbg = 1;#endif#ifndef MMU_DEBUG#define ASSERT(x) do { } while (0)#else#define ASSERT(x)							\	if (!(x)) {							\		printk(KERN_WARNING "assertion failed %s:%d: %s\n",	\		       __FILE__, __LINE__, #x);				\	}#endif#define PT64_PT_BITS 9#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)#define PT32_PT_BITS 10#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)#define PT_WRITABLE_SHIFT 1#define PT_PRESENT_MASK (1ULL << 0)#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)#define PT_USER_MASK (1ULL << 2)#define PT_PWT_MASK (1ULL << 3)#define PT_PCD_MASK (1ULL << 4)#define PT_ACCESSED_MASK (1ULL << 5)#define PT_DIRTY_MASK (1ULL << 6)#define PT_PAGE_SIZE_MASK (1ULL << 7)#define PT_PAT_MASK (1ULL << 7)#define PT_GLOBAL_MASK (1ULL << 8)#define PT64_NX_MASK (1ULL << 63)#define PT_PAT_SHIFT 7#define PT_DIR_PAT_SHIFT 12#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)#define PT32_DIR_PSE36_SIZE 4#define PT32_DIR_PSE36_SHIFT 13#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)#define PT_FIRST_AVAIL_BITS_SHIFT 9#define PT64_SECOND_AVAIL_BITS_SHIFT 52#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)#define VALID_PAGE(x) ((x) != INVALID_PAGE)#define PT64_LEVEL_BITS 9#define PT64_LEVEL_SHIFT(level) \		( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )#define PT64_LEVEL_MASK(level) \		(((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))#define PT64_INDEX(address, level)\	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))#define PT32_LEVEL_BITS 10#define PT32_LEVEL_SHIFT(level) \		( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )#define PT32_LEVEL_MASK(level) \		(((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))#define PT32_INDEX(address, level)\	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))#define PT64_DIR_BASE_ADDR_MASK \	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))#define PT32_BASE_ADDR_MASK PAGE_MASK#define PT32_DIR_BASE_ADDR_MASK \	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))#define PFERR_PRESENT_MASK (1U << 0)#define PFERR_WRITE_MASK (1U << 1)#define PFERR_USER_MASK (1U << 2)#define PFERR_FETCH_MASK (1U << 4)#define PT64_ROOT_LEVEL 4#define PT32_ROOT_LEVEL 2#define PT32E_ROOT_LEVEL 3#define PT_DIRECTORY_LEVEL 2#define PT_PAGE_TABLE_LEVEL 1#define RMAP_EXT 4struct kvm_rmap_desc {	u64 *shadow_ptes[RMAP_EXT];	struct kvm_rmap_desc *more;};static struct kmem_cache *pte_chain_cache;static struct kmem_cache *rmap_desc_cache;static struct kmem_cache *mmu_page_header_cache;static int is_write_protection(struct kvm_vcpu *vcpu){	return vcpu->cr0 & X86_CR0_WP;}static int is_cpuid_PSE36(void){	return 1;}static int is_nx(struct kvm_vcpu *vcpu){	return vcpu->shadow_efer & EFER_NX;}static int is_present_pte(unsigned long pte){	return pte & PT_PRESENT_MASK;}static int is_writeble_pte(unsigned long pte){	return pte & PT_WRITABLE_MASK;}static int is_io_pte(unsigned long pte){	return pte & PT_SHADOW_IO_MARK;}static int is_rmap_pte(u64 pte){	return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))		== (PT_WRITABLE_MASK | PT_PRESENT_MASK);}static void set_shadow_pte(u64 *sptep, u64 spte){#ifdef CONFIG_X86_64	set_64bit((unsigned long *)sptep, spte);#else	set_64bit((unsigned long long *)sptep, spte);#endif}static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,				  struct kmem_cache *base_cache, int min){	void *obj;	if (cache->nobjs >= min)		return 0;	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {		obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);		if (!obj)			return -ENOMEM;		cache->objects[cache->nobjs++] = obj;	}	return 0;}static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc){	while (mc->nobjs)		kfree(mc->objects[--mc->nobjs]);}static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,				       int min){	struct page *page;	if (cache->nobjs >= min)		return 0;	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {		page = alloc_page(GFP_KERNEL);		if (!page)			return -ENOMEM;		set_page_private(page, 0);		cache->objects[cache->nobjs++] = page_address(page);	}	return 0;}static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc){	while (mc->nobjs)		free_page((unsigned long)mc->objects[--mc->nobjs]);}static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu){	int r;	kvm_mmu_free_some_pages(vcpu);	r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,				   pte_chain_cache, 4);	if (r)		goto out;	r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,				   rmap_desc_cache, 1);	if (r)		goto out;	r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);	if (r)		goto out;	r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,				   mmu_page_header_cache, 4);out:	return r;}static void mmu_free_memory_caches(struct kvm_vcpu *vcpu){	mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);	mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);	mmu_free_memory_cache_page(&vcpu->mmu_page_cache);	mmu_free_memory_cache(&vcpu->mmu_page_header_cache);}static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,				    size_t size){	void *p;	BUG_ON(!mc->nobjs);	p = mc->objects[--mc->nobjs];	memset(p, 0, size);	return p;}static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu){	return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,				      sizeof(struct kvm_pte_chain));}static void mmu_free_pte_chain(struct kvm_pte_chain *pc){	kfree(pc);}static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu){	return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,				      sizeof(struct kvm_rmap_desc));}static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd){	kfree(rd);}/* * Reverse mapping data structures: * * If page->private bit zero is zero, then page->private points to the * shadow page table entry that points to page_address(page). * * If page->private bit zero is one, (then page->private & ~1) points * to a struct kvm_rmap_desc containing more mappings. */static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte){	struct page *page;	struct kvm_rmap_desc *desc;	int i;	if (!is_rmap_pte(*spte))		return;	page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);	if (!page_private(page)) {		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);		set_page_private(page,(unsigned long)spte);	} else if (!(page_private(page) & 1)) {		rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);		desc = mmu_alloc_rmap_desc(vcpu);		desc->shadow_ptes[0] = (u64 *)page_private(page);		desc->shadow_ptes[1] = spte;		set_page_private(page,(unsigned long)desc | 1);	} else {		rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);		desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);		while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)			desc = desc->more;		if (desc->shadow_ptes[RMAP_EXT-1]) {			desc->more = mmu_alloc_rmap_desc(vcpu);			desc = desc->more;		}		for (i = 0; desc->shadow_ptes[i]; ++i)			;		desc->shadow_ptes[i] = spte;	}}static void rmap_desc_remove_entry(struct page *page,				   struct kvm_rmap_desc *desc,				   int i,				   struct kvm_rmap_desc *prev_desc){	int j;	for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)		;	desc->shadow_ptes[i] = desc->shadow_ptes[j];	desc->shadow_ptes[j] = NULL;	if (j != 0)		return;	if (!prev_desc && !desc->more)		set_page_private(page,(unsigned long)desc->shadow_ptes[0]);	else		if (prev_desc)			prev_desc->more = desc->more;		else			set_page_private(page,(unsigned long)desc->more | 1);	mmu_free_rmap_desc(desc);}static void rmap_remove(u64 *spte){	struct page *page;	struct kvm_rmap_desc *desc;	struct kvm_rmap_desc *prev_desc;	int i;	if (!is_rmap_pte(*spte))		return;	page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);	if (!page_private(page)) {		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);		BUG();	} else if (!(page_private(page) & 1)) {		rmap_printk("rmap_remove:  %p %llx 1->0\n", spte, *spte);		if ((u64 *)page_private(page) != spte) {			printk(KERN_ERR "rmap_remove:  %p %llx 1->BUG\n",			       spte, *spte);			BUG();		}		set_page_private(page,0);	} else {		rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);		desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);		prev_desc = NULL;		while (desc) {			for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)				if (desc->shadow_ptes[i] == spte) {					rmap_desc_remove_entry(page,							       desc, i,							       prev_desc);					return;				}			prev_desc = desc;			desc = desc->more;		}		BUG();	}}static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn){	struct kvm *kvm = vcpu->kvm;	struct page *page;	struct kvm_rmap_desc *desc;	u64 *spte;	page = gfn_to_page(kvm, gfn);	BUG_ON(!page);	while (page_private(page)) {		if (!(page_private(page) & 1))			spte = (u64 *)page_private(page);		else {			desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);			spte = desc->shadow_ptes[0];		}		BUG_ON(!spte);		BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT		       != page_to_pfn(page));		BUG_ON(!(*spte & PT_PRESENT_MASK));		BUG_ON(!(*spte & PT_WRITABLE_MASK));		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);		rmap_remove(spte);		set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);		kvm_flush_remote_tlbs(vcpu->kvm);	}}#ifdef MMU_DEBUGstatic int is_empty_shadow_page(u64 *spt){	u64 *pos;	u64 *end;	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)		if (*pos != 0) {			printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,			       pos, *pos);			return 0;		}	return 1;}#endifstatic void kvm_mmu_free_page(struct kvm *kvm,			      struct kvm_mmu_page *page_head){	ASSERT(is_empty_shadow_page(page_head->spt));	list_del(&page_head->link);	__free_page(virt_to_page(page_head->spt));	kfree(page_head);	++kvm->n_free_mmu_pages;}static unsigned kvm_page_table_hashfn(gfn_t gfn){	return gfn;}static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,					       u64 *parent_pte){	struct kvm_mmu_page *page;	if (!vcpu->kvm->n_free_mmu_pages)		return NULL;	page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,				      sizeof *page);	page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);	set_page_private(virt_to_page(page->spt), (unsigned long)page);	list_add(&page->link, &vcpu->kvm->active_mmu_pages);	ASSERT(is_empty_shadow_page(page->spt));	page->slot_bitmap = 0;	page->multimapped = 0;	page->parent_pte = parent_pte;	--vcpu->kvm->n_free_mmu_pages;	return page;}static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,				    struct kvm_mmu_page *page, u64 *parent_pte){

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?