📄 vhpt.c

📁 xen 3.2.2 源码
💻 C
字号:
/* * Initialize VHPT support. * * Copyright (C) 2004 Hewlett-Packard Co *	Dan Magenheimer <dan.magenheimer@hp.com> * * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> *                    VA Linux Systems Japan K.K. *                    per vcpu vhpt support */#include <linux/config.h>#include <linux/kernel.h>#include <linux/init.h>#include <asm/processor.h>#include <asm/system.h>#include <asm/pgalloc.h>#include <asm/page.h>#include <asm/vhpt.h>#include <asm/vcpu.h>#include <asm/vcpumask.h>#include <asm/vmmu.h>DEFINE_PER_CPU (unsigned long, vhpt_paddr);DEFINE_PER_CPU (unsigned long, vhpt_pend);#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCKDEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);#endifstatic void__vhpt_flush(unsigned long vhpt_maddr, unsigned long vhpt_size_log2){	struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);	unsigned long num_entries = 1 << (vhpt_size_log2 - 5);	int i;	for (i = 0; i < num_entries; i++, v++)		v->ti_tag = INVALID_TI_TAG;}voidlocal_vhpt_flush(void){	/* increment flush clock before flush */	u32 flush_time = tlbflush_clock_inc_and_return();	__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr), VHPT_SIZE_LOG2);	/* this must be after flush */	tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),	                     flush_time);	perfc_incr(local_vhpt_flush);}voidvcpu_vhpt_flush(struct vcpu* v){	unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT	if (HAS_PERVCPU_VHPT(v->domain))		vhpt_size_log2 = v->arch.pta.size;#endif	__vhpt_flush(vcpu_vhpt_maddr(v), vhpt_size_log2);	perfc_incr(vcpu_vhpt_flush);}static voidvhpt_erase(unsigned long vhpt_maddr, unsigned long vhpt_size_log2){	struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);	unsigned long num_entries = 1 << (vhpt_size_log2 - 5);	int i;	for (i = 0; i < num_entries; i++, v++) {		v->itir = 0;		v->CChain = 0;		v->page_flags = 0;		v->ti_tag = INVALID_TI_TAG;	}	// initialize cache too???}void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long itir){	struct vhpt_lf_entry *vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);	unsigned long tag = ia64_ttag (vadr);	/* Even though VHPT is per VCPU, still need to first disable the entry,	 * because the processor may support speculative VHPT walk.  */	vlfe->ti_tag = INVALID_TI_TAG;	wmb();	vlfe->itir = itir;	vlfe->page_flags = pte | _PAGE_P;	*(volatile unsigned long*)&vlfe->ti_tag = tag;}void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,			   unsigned long itir){	unsigned char ps = current->arch.vhpt_pg_shift;	ia64_itir_t _itir = {.itir = itir};	unsigned long mask = (1L << _itir.ps) - 1;	int i;	if (_itir.ps - ps > 10 && !running_on_sim) {		// if this happens, we may want to revisit this algorithm		panic("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");	}	if (_itir.ps - ps > 2) {		// FIXME: Should add counter here to see how often this		//  happens (e.g. for 16MB pages!) and determine if it		//  is a performance problem.  On a quick look, it takes		//  about 39000 instrs for a 16MB page and it seems to occur		//  only a few times/second, so OK for now.		//  An alternate solution would be to just insert the one		//  16KB in the vhpt (but with the full mapping)?		//printk("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"			//"va=%p, pa=%p, pa-masked=%p\n",			//logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,			//(pte&_PFN_MASK)&~mask);	}	vaddr &= ~mask;	pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);	for (i = 1L << (_itir.ps - ps); i > 0; i--) {		vhpt_insert(vaddr, pte, _itir.itir);		vaddr += (1L << ps);	}}void __init vhpt_init(void){	unsigned long paddr;	struct page_info *page;#if !VHPT_ENABLED	return;#endif	/* This allocation only holds true if vhpt table is unique for	 * all domains. Or else later new vhpt table should be allocated	 * from domain heap when each domain is created. Assume xen buddy	 * allocator can provide natural aligned page by order?	 */	page = alloc_domheap_pages(NULL, VHPT_SIZE_LOG2 - PAGE_SHIFT, 0);	if (!page)		panic("vhpt_init: can't allocate VHPT!\n");	paddr = page_to_maddr(page);	if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))		panic("vhpt_init: bad VHPT alignment!\n");	__get_cpu_var(vhpt_paddr) = paddr;	__get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;	printk(XENLOG_DEBUG "vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",	       paddr, __get_cpu_var(vhpt_pend));	vhpt_erase(paddr, VHPT_SIZE_LOG2);	// we don't enable VHPT here.	// context_switch() or schedule_tail() does it.}#ifdef CONFIG_XEN_IA64_PERVCPU_VHPTintpervcpu_vhpt_alloc(struct vcpu *v){	unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;	if (v->domain->arch.vhpt_size_log2 > 0)	    vhpt_size_log2 =		canonicalize_vhpt_size(v->domain->arch.vhpt_size_log2);	printk(XENLOG_DEBUG "%s vhpt_size_log2=%ld\n",	       __func__, vhpt_size_log2);	v->arch.vhpt_entries =		(1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);	v->arch.vhpt_page =		alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);	if (!v->arch.vhpt_page)		return -ENOMEM;		v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);	if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))		panic("pervcpu_vhpt_init: bad VHPT alignment!\n");	v->arch.pta.val = 0; // to zero reserved bits	v->arch.pta.ve = 1; // enable vhpt	v->arch.pta.size = vhpt_size_log2;	v->arch.pta.vf = 1; // long format	v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;	vhpt_erase(v->arch.vhpt_maddr, vhpt_size_log2);	smp_mb(); // per vcpu vhpt may be used by another physical cpu.	return 0;}voidpervcpu_vhpt_free(struct vcpu *v){	if (likely(v->arch.vhpt_page != NULL))		free_domheap_pages(v->arch.vhpt_page,		                   v->arch.pta.size - PAGE_SHIFT);}#endifvoiddomain_purge_swtc_entries(struct domain *d){	struct vcpu* v;	for_each_vcpu(d, v) {		if (!v->is_initialised)			continue;		/* Purge TC entries.		   FIXME: clear only if match.  */		vcpu_purge_tr_entry(&PSCBX(v,dtlb));		vcpu_purge_tr_entry(&PSCBX(v,itlb));	}}voiddomain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,                                          vcpumask_t vcpu_dirty_mask){	int vcpu;	for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {		struct vcpu* v = d->vcpu[vcpu];		if (!v->is_initialised)			continue;		/* Purge TC entries.		   FIXME: clear only if match.  */		vcpu_purge_tr_entry(&PSCBX(v, dtlb));		vcpu_purge_tr_entry(&PSCBX(v, itlb));	}}// SMP: we can't assume v == current, vcpu might move to another physical cpu.// So memory barrier is necessary.// if we can guranttee that vcpu can run on only this physical cpu// (e.g. vcpu == current), smp_mb() is unnecessary.void vcpu_flush_vtlb_all(struct vcpu *v){	if (VMX_DOMAIN(v)) {		/* This code may be call for remapping shared_info and		   grant_table share page from guest_physmap_remove_page()		   in arch_memory_op() XENMEM_add_to_physmap to realize		   PV-on-HVM feature. */		/* FIXME: This is not SMP-safe yet about p2m table */		/* Purge vTLB for VT-i domain */		thash_purge_all(v);	}	else {		/* First VCPU tlb.  */		vcpu_purge_tr_entry(&PSCBX(v,dtlb));		vcpu_purge_tr_entry(&PSCBX(v,itlb));		smp_mb();		/* Then VHPT.  */		if (HAS_PERVCPU_VHPT(v->domain))			vcpu_vhpt_flush(v);		else			local_vhpt_flush();		smp_mb();		/* Then mTLB.  */		local_flush_tlb_all();	}	/* We could clear bit in d->domain_dirty_cpumask only if domain d in	   not running on this processor.  There is currently no easy way to	   check this.  */	perfc_incr(vcpu_flush_vtlb_all);}static void __vcpu_flush_vtlb_all(void *vcpu){	vcpu_flush_vtlb_all((struct vcpu*)vcpu);}// caller must incremented reference count to d somehow.void domain_flush_vtlb_all(struct domain* d){	int cpu = smp_processor_id ();	struct vcpu *v;	for_each_vcpu(d, v) {		if (!v->is_initialised)			continue;		if (v->processor == cpu)			vcpu_flush_vtlb_all(v);		else			// SMP: it is racy to reference v->processor.			// vcpu scheduler may move this vcpu to another			// physicall processor, and change the value			// using plain store.			// We may be seeing the old value of it.			// In such case, flush_vtlb_for_context_switch()			// takes care of mTLB flush.			smp_call_function_single(v->processor,						 __vcpu_flush_vtlb_all,						 v, 1, 1);	}	perfc_incr(domain_flush_vtlb_all);}// Callers may need to call smp_mb() before/after calling this.// Be carefull.static void__flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range){	void *vhpt_base = __va(vhpt_maddr);	u64 pgsz = 1L << current->arch.vhpt_pg_shift;	u64 purge_addr = vadr & PAGE_MASK;	addr_range += vadr - purge_addr;	addr_range = PAGE_ALIGN(addr_range);	while ((long)addr_range > 0) {		/* Get the VHPT entry.  */		unsigned int off = ia64_thash(purge_addr) -			__va_ul(vcpu_vhpt_maddr(current));		struct vhpt_lf_entry *v = vhpt_base + off;		v->ti_tag = INVALID_TI_TAG;		addr_range -= pgsz;		purge_addr += pgsz;	}}static voidcpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range){	__flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);}static voidvcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range){	__flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);}void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range){	if (HAS_PERVCPU_VHPT(current->domain))		vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);	else		cpu_flush_vhpt_range(current->processor,		                     vadr, 1UL << log_range);	ia64_ptcl(vadr, log_range << 2);	ia64_srlz_i();	perfc_incr(vcpu_flush_tlb_vhpt_range);}void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range){	struct vcpu *v;#if 0	// this only seems to occur at shutdown, but it does occur	if ((!addr_range) || addr_range & (addr_range - 1)) {		printk("vhpt_flush_address: weird range, spinning...\n");		while(1);	}#endif	domain_purge_swtc_entries(d);	smp_mb();	for_each_vcpu (d, v) {		if (!v->is_initialised)			continue;		if (HAS_PERVCPU_VHPT(d)) {			vcpu_flush_vhpt_range(v, vadr, addr_range);		} else {			// SMP: it is racy to reference v->processor.			// vcpu scheduler may move this vcpu to another			// physicall processor, and change the value			// using plain store.			// We may be seeing the old value of it.			// In such case, flush_vtlb_for_context_switch()			/* Invalidate VHPT entries.  */			cpu_flush_vhpt_range(v->processor, vadr, addr_range);		}	}	// ptc.ga has release semantics.	/* ptc.ga  */	platform_global_tlb_purge(vadr, vadr + addr_range,				  current->arch.vhpt_pg_shift);	perfc_incr(domain_flush_vtlb_range);}#ifdef CONFIG_XEN_IA64_TLB_TRACK#include <asm/tlb_track.h>#include <asm/vmx_vcpu.h>void__domain_flush_vtlb_track_entry(struct domain* d,                                const struct tlb_track_entry* entry){	unsigned long rr7_rid;	int swap_rr0 = 0;	unsigned long old_rid;	unsigned long vaddr = entry->vaddr;	struct vcpu* v;	int cpu;	int vcpu;	int local_purge = 1;	/* tlb inert tracking is done in PAGE_SIZE uint. */	unsigned char ps = max_t(unsigned char,				 current->arch.vhpt_pg_shift, PAGE_SHIFT);	/* This case isn't supported (yet). */	BUG_ON(current->arch.vhpt_pg_shift > PAGE_SHIFT);		BUG_ON((vaddr >> VRN_SHIFT) != VRN7);	/*	 * heuristic:	 * dom0linux accesses grant mapped pages via the kernel	 * straight mapped area and it doesn't change rr7 rid. 	 * So it is likey that rr7 == entry->rid so that	 * we can avoid rid change.	 * When blktap is supported, this heuristic should be revised.	 */	vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);	if (likely(rr7_rid == entry->rid)) {		perfc_incr(tlb_track_use_rr7);	} else {		swap_rr0 = 1;		vaddr = (vaddr << 3) >> 3;// force vrn0		perfc_incr(tlb_track_swap_rr0);	}	// tlb_track_entry_printf(entry);	if (swap_rr0) {		vcpu_get_rr(current, 0, &old_rid);		vcpu_set_rr(current, 0, entry->rid);	}    	if (HAS_PERVCPU_VHPT(d)) {		for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {			v = d->vcpu[vcpu];			if (!v->is_initialised)				continue;			/* Invalidate VHPT entries.  */			vcpu_flush_vhpt_range(v, vaddr, 1L << ps);			/*			 * current->processor == v->processor			 * is racy. we may see old v->processor and			 * a new physical processor of v might see old			 * vhpt entry and insert tlb.			 */			if (v != current)				local_purge = 0;		}	} else {		for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {			/* Invalidate VHPT entries.  */			cpu_flush_vhpt_range(cpu, vaddr, 1L << ps);			if (d->vcpu[cpu] != current)				local_purge = 0;		}	}	/* ptc.ga  */	if (local_purge) {		ia64_ptcl(vaddr, ps << 2);		perfc_incr(domain_flush_vtlb_local);	} else {		/* ptc.ga has release semantics. */		platform_global_tlb_purge(vaddr, vaddr + (1L << ps), ps);		perfc_incr(domain_flush_vtlb_global);	}	if (swap_rr0) {		vcpu_set_rr(current, 0, old_rid);	}	perfc_incr(domain_flush_vtlb_track_entry);}voiddomain_flush_vtlb_track_entry(struct domain* d,                              const struct tlb_track_entry* entry){	domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);	smp_mb();	__domain_flush_vtlb_track_entry(d, entry);}#endifstatic void flush_tlb_vhpt_all (struct domain *d){	/* First VHPT.  */	local_vhpt_flush ();	/* Then mTLB.  */	local_flush_tlb_all ();}void domain_flush_tlb_vhpt(struct domain *d){	/* Very heavy...  */	if (HAS_PERVCPU_VHPT(d) || d->arch.is_vti)		on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);	else		on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);	cpus_clear (d->domain_dirty_cpumask);}void flush_tlb_mask(cpumask_t mask){    int cpu;    cpu = smp_processor_id();    if (cpu_isset (cpu, mask)) {        cpu_clear(cpu, mask);        flush_tlb_vhpt_all (NULL);    }    if (cpus_empty(mask))        return;    for_each_cpu_mask (cpu, mask)        smp_call_function_single            (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);}#ifdef PERF_COUNTERSvoid gather_vhpt_stats(void){	int i, cpu;	perfc_set(vhpt_nbr_entries, VHPT_NUM_ENTRIES);	for_each_present_cpu (cpu) {		struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu));		unsigned long vhpt_valid = 0;		for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)			if (!(v->ti_tag & INVALID_TI_TAG))				vhpt_valid++;		per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = vhpt_valid;	}}#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -