mmu.c

来自「linux 内核源代码」· C语言 代码 · 共 1,499 行 · 第 1/3 页

C
1,499
字号
	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);}static int paging32_init_context(struct kvm_vcpu *vcpu){	struct kvm_mmu *context = &vcpu->mmu;	context->new_cr3 = paging_new_cr3;	context->page_fault = paging32_page_fault;	context->gva_to_gpa = paging32_gva_to_gpa;	context->free = paging_free;	context->root_level = PT32_ROOT_LEVEL;	context->shadow_root_level = PT32E_ROOT_LEVEL;	context->root_hpa = INVALID_PAGE;	return 0;}static int paging32E_init_context(struct kvm_vcpu *vcpu){	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);}static int init_kvm_mmu(struct kvm_vcpu *vcpu){	ASSERT(vcpu);	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));	if (!is_paging(vcpu))		return nonpaging_init_context(vcpu);	else if (is_long_mode(vcpu))		return paging64_init_context(vcpu);	else if (is_pae(vcpu))		return paging32E_init_context(vcpu);	else		return paging32_init_context(vcpu);}static void destroy_kvm_mmu(struct kvm_vcpu *vcpu){	ASSERT(vcpu);	if (VALID_PAGE(vcpu->mmu.root_hpa)) {		vcpu->mmu.free(vcpu);		vcpu->mmu.root_hpa = INVALID_PAGE;	}}int kvm_mmu_reset_context(struct kvm_vcpu *vcpu){	destroy_kvm_mmu(vcpu);	return init_kvm_mmu(vcpu);}EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);int kvm_mmu_load(struct kvm_vcpu *vcpu){	int r;	mutex_lock(&vcpu->kvm->lock);	r = mmu_topup_memory_caches(vcpu);	if (r)		goto out;	mmu_alloc_roots(vcpu);	kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);	kvm_mmu_flush_tlb(vcpu);out:	mutex_unlock(&vcpu->kvm->lock);	return r;}EXPORT_SYMBOL_GPL(kvm_mmu_load);void kvm_mmu_unload(struct kvm_vcpu *vcpu){	mmu_free_roots(vcpu);}static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,				  struct kvm_mmu_page *page,				  u64 *spte){	u64 pte;	struct kvm_mmu_page *child;	pte = *spte;	if (is_present_pte(pte)) {		if (page->role.level == PT_PAGE_TABLE_LEVEL)			rmap_remove(spte);		else {			child = page_header(pte & PT64_BASE_ADDR_MASK);			mmu_page_remove_parent_pte(child, spte);		}	}	set_shadow_pte(spte, 0);	kvm_flush_remote_tlbs(vcpu->kvm);}static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,				  struct kvm_mmu_page *page,				  u64 *spte,				  const void *new, int bytes){	if (page->role.level != PT_PAGE_TABLE_LEVEL)		return;	if (page->role.glevels == PT32_ROOT_LEVEL)		paging32_update_pte(vcpu, page, spte, new, bytes);	else		paging64_update_pte(vcpu, page, spte, new, bytes);}void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,		       const u8 *new, int bytes){	gfn_t gfn = gpa >> PAGE_SHIFT;	struct kvm_mmu_page *page;	struct hlist_node *node, *n;	struct hlist_head *bucket;	unsigned index;	u64 *spte;	unsigned offset = offset_in_page(gpa);	unsigned pte_size;	unsigned page_offset;	unsigned misaligned;	unsigned quadrant;	int level;	int flooded = 0;	int npte;	pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);	if (gfn == vcpu->last_pt_write_gfn) {		++vcpu->last_pt_write_count;		if (vcpu->last_pt_write_count >= 3)			flooded = 1;	} else {		vcpu->last_pt_write_gfn = gfn;		vcpu->last_pt_write_count = 1;	}	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;	bucket = &vcpu->kvm->mmu_page_hash[index];	hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {		if (page->gfn != gfn || page->role.metaphysical)			continue;		pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);		misaligned |= bytes < 4;		if (misaligned || flooded) {			/*			 * Misaligned accesses are too much trouble to fix			 * up; also, they usually indicate a page is not used			 * as a page table.			 *			 * If we're seeing too many writes to a page,			 * it may no longer be a page table, or we may be			 * forking, in which case it is better to unmap the			 * page.			 */			pgprintk("misaligned: gpa %llx bytes %d role %x\n",				 gpa, bytes, page->role.word);			kvm_mmu_zap_page(vcpu->kvm, page);			continue;		}		page_offset = offset;		level = page->role.level;		npte = 1;		if (page->role.glevels == PT32_ROOT_LEVEL) {			page_offset <<= 1;	/* 32->64 */			/*			 * A 32-bit pde maps 4MB while the shadow pdes map			 * only 2MB.  So we need to double the offset again			 * and zap two pdes instead of one.			 */			if (level == PT32_ROOT_LEVEL) {				page_offset &= ~7; /* kill rounding error */				page_offset <<= 1;				npte = 2;			}			quadrant = page_offset >> PAGE_SHIFT;			page_offset &= ~PAGE_MASK;			if (quadrant != page->role.quadrant)				continue;		}		spte = &page->spt[page_offset / sizeof(*spte)];		while (npte--) {			mmu_pte_write_zap_pte(vcpu, page, spte);			mmu_pte_write_new_pte(vcpu, page, spte, new, bytes);			++spte;		}	}}int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva){	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);	return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);}void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu){	while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {		struct kvm_mmu_page *page;		page = container_of(vcpu->kvm->active_mmu_pages.prev,				    struct kvm_mmu_page, link);		kvm_mmu_zap_page(vcpu->kvm, page);	}}static void free_mmu_pages(struct kvm_vcpu *vcpu){	struct kvm_mmu_page *page;	while (!list_empty(&vcpu->kvm->active_mmu_pages)) {		page = container_of(vcpu->kvm->active_mmu_pages.next,				    struct kvm_mmu_page, link);		kvm_mmu_zap_page(vcpu->kvm, page);	}	free_page((unsigned long)vcpu->mmu.pae_root);}static int alloc_mmu_pages(struct kvm_vcpu *vcpu){	struct page *page;	int i;	ASSERT(vcpu);	vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES;	/*	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.	 * Therefore we need to allocate shadow page tables in the first	 * 4GB of memory, which happens to fit the DMA32 zone.	 */	page = alloc_page(GFP_KERNEL | __GFP_DMA32);	if (!page)		goto error_1;	vcpu->mmu.pae_root = page_address(page);	for (i = 0; i < 4; ++i)		vcpu->mmu.pae_root[i] = INVALID_PAGE;	return 0;error_1:	free_mmu_pages(vcpu);	return -ENOMEM;}int kvm_mmu_create(struct kvm_vcpu *vcpu){	ASSERT(vcpu);	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));	return alloc_mmu_pages(vcpu);}int kvm_mmu_setup(struct kvm_vcpu *vcpu){	ASSERT(vcpu);	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));	return init_kvm_mmu(vcpu);}void kvm_mmu_destroy(struct kvm_vcpu *vcpu){	ASSERT(vcpu);	destroy_kvm_mmu(vcpu);	free_mmu_pages(vcpu);	mmu_free_memory_caches(vcpu);}void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot){	struct kvm_mmu_page *page;	list_for_each_entry(page, &kvm->active_mmu_pages, link) {		int i;		u64 *pt;		if (!test_bit(slot, &page->slot_bitmap))			continue;		pt = page->spt;		for (i = 0; i < PT64_ENT_PER_PAGE; ++i)			/* avoid RMW */			if (pt[i] & PT_WRITABLE_MASK) {				rmap_remove(&pt[i]);				pt[i] &= ~PT_WRITABLE_MASK;			}	}}void kvm_mmu_zap_all(struct kvm *kvm){	struct kvm_mmu_page *page, *node;	list_for_each_entry_safe(page, node, &kvm->active_mmu_pages, link)		kvm_mmu_zap_page(kvm, page);	kvm_flush_remote_tlbs(kvm);}void kvm_mmu_module_exit(void){	if (pte_chain_cache)		kmem_cache_destroy(pte_chain_cache);	if (rmap_desc_cache)		kmem_cache_destroy(rmap_desc_cache);	if (mmu_page_header_cache)		kmem_cache_destroy(mmu_page_header_cache);}int kvm_mmu_module_init(void){	pte_chain_cache = kmem_cache_create("kvm_pte_chain",					    sizeof(struct kvm_pte_chain),					    0, 0, NULL);	if (!pte_chain_cache)		goto nomem;	rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",					    sizeof(struct kvm_rmap_desc),					    0, 0, NULL);	if (!rmap_desc_cache)		goto nomem;	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",						  sizeof(struct kvm_mmu_page),						  0, 0, NULL);	if (!mmu_page_header_cache)		goto nomem;	return 0;nomem:	kvm_mmu_module_exit();	return -ENOMEM;}#ifdef AUDITstatic const char *audit_msg;static gva_t canonicalize(gva_t gva){#ifdef CONFIG_X86_64	gva = (long long)(gva << 16) >> 16;#endif	return gva;}static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,				gva_t va, int level){	u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);	int i;	gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));	for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {		u64 ent = pt[i];		if (!(ent & PT_PRESENT_MASK))			continue;		va = canonicalize(va);		if (level > 1)			audit_mappings_page(vcpu, ent, va, level - 1);		else {			gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);			hpa_t hpa = gpa_to_hpa(vcpu, gpa);			if ((ent & PT_PRESENT_MASK)			    && (ent & PT64_BASE_ADDR_MASK) != hpa)				printk(KERN_ERR "audit error: (%s) levels %d"				       " gva %lx gpa %llx hpa %llx ent %llx\n",				       audit_msg, vcpu->mmu.root_level,				       va, gpa, hpa, ent);		}	}}static void audit_mappings(struct kvm_vcpu *vcpu){	unsigned i;	if (vcpu->mmu.root_level == 4)		audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);	else		for (i = 0; i < 4; ++i)			if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)				audit_mappings_page(vcpu,						    vcpu->mmu.pae_root[i],						    i << 30,						    2);}static int count_rmaps(struct kvm_vcpu *vcpu){	int nmaps = 0;	int i, j, k;	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];		struct kvm_rmap_desc *d;		for (j = 0; j < m->npages; ++j) {			struct page *page = m->phys_mem[j];			if (!page->private)				continue;			if (!(page->private & 1)) {				++nmaps;				continue;			}			d = (struct kvm_rmap_desc *)(page->private & ~1ul);			while (d) {				for (k = 0; k < RMAP_EXT; ++k)					if (d->shadow_ptes[k])						++nmaps;					else						break;				d = d->more;			}		}	}	return nmaps;}static int count_writable_mappings(struct kvm_vcpu *vcpu){	int nmaps = 0;	struct kvm_mmu_page *page;	int i;	list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {		u64 *pt = page->spt;		if (page->role.level != PT_PAGE_TABLE_LEVEL)			continue;		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {			u64 ent = pt[i];			if (!(ent & PT_PRESENT_MASK))				continue;			if (!(ent & PT_WRITABLE_MASK))				continue;			++nmaps;		}	}	return nmaps;}static void audit_rmap(struct kvm_vcpu *vcpu){	int n_rmap = count_rmaps(vcpu);	int n_actual = count_writable_mappings(vcpu);	if (n_rmap != n_actual)		printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",		       __FUNCTION__, audit_msg, n_rmap, n_actual);}static void audit_write_protection(struct kvm_vcpu *vcpu){	struct kvm_mmu_page *page;	list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {		hfn_t hfn;		struct page *pg;		if (page->role.metaphysical)			continue;		hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)			>> PAGE_SHIFT;		pg = pfn_to_page(hfn);		if (pg->private)			printk(KERN_ERR "%s: (%s) shadow page has writable"			       " mappings: gfn %lx role %x\n",			       __FUNCTION__, audit_msg, page->gfn,			       page->role.word);	}}static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg){	int olddbg = dbg;	dbg = 0;	audit_msg = msg;	audit_rmap(vcpu);	audit_write_protection(vcpu);	audit_mappings(vcpu);	dbg = olddbg;}#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?