smp.c

来自「底层驱动开发」· C语言 代码 · 共 1,262 行 · 第 1/3 页

C
1,262
字号
	}}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){	u64 pstate;	int i;	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));	for_each_cpu_mask(i, mask)		spitfire_xcall_helper(data0, data1, data2, pstate, i);}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that.  However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){	u64 pstate, ver;	int nack_busy_id, is_jalapeno;	if (cpus_empty(mask))		return;	/* Unfortunately, someone at Sun had the brilliant idea to make the	 * busy/nack fields hard-coded by ITID number for this Ultra-III	 * derivative processor.	 */	__asm__ ("rdpr %%ver, %0" : "=r" (ver));	is_jalapeno = ((ver >> 32) == 0x003e0016);	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));retry:	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"			     : : "r" (pstate), "i" (PSTATE_IE));	/* Setup the dispatch data registers. */	__asm__ __volatile__("stxa	%0, [%3] %6\n\t"			     "stxa	%1, [%4] %6\n\t"			     "stxa	%2, [%5] %6\n\t"			     "membar	#Sync\n\t"			     : /* no outputs */			     : "r" (data0), "r" (data1), "r" (data2),			       "r" (0x40), "r" (0x50), "r" (0x60),			       "i" (ASI_INTR_W));	nack_busy_id = 0;	{		int i;		for_each_cpu_mask(i, mask) {			u64 target = (i << 14) | 0x70;			if (!is_jalapeno)				target |= (nack_busy_id << 24);			__asm__ __volatile__(				"stxa	%%g0, [%0] %1\n\t"				"membar	#Sync\n\t"				: /* no outputs */				: "r" (target), "i" (ASI_INTR_W));			nack_busy_id++;		}	}	/* Now, poll for completion. */	{		u64 dispatch_stat;		long stuck;		stuck = 100000 * nack_busy_id;		do {			__asm__ __volatile__("ldxa	[%%g0] %1, %0"					     : "=r" (dispatch_stat)					     : "i" (ASI_INTR_DISPATCH_STAT));			if (dispatch_stat == 0UL) {				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"						     : : "r" (pstate));				return;			}			if (!--stuck)				break;		} while (dispatch_stat & 0x5555555555555555UL);		__asm__ __volatile__("wrpr %0, 0x0, %%pstate"				     : : "r" (pstate));		if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {			/* Busy bits will not clear, continue instead			 * of freezing up on this cpu.			 */			printk("CPU[%d]: mondo stuckage result[%016lx]\n",			       smp_processor_id(), dispatch_stat);		} else {			int i, this_busy_nack = 0;			/* Delay some random time with interrupts enabled			 * to prevent deadlock.			 */			udelay(2 * nack_busy_id);			/* Clear out the mask bits for cpus which did not			 * NACK us.			 */			for_each_cpu_mask(i, mask) {				u64 check_mask;				if (is_jalapeno)					check_mask = (0x2UL << (2*i));				else					check_mask = (0x2UL <<						      this_busy_nack);				if ((dispatch_stat & check_mask) == 0)					cpu_clear(i, mask);				this_busy_nack += 2;			}			goto retry;		}	}}/* Send cross call to all processors mentioned in MASK * except self. */static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask){	u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));	int this_cpu = get_cpu();	cpus_and(mask, mask, cpu_online_map);	cpu_clear(this_cpu, mask);	if (tlb_type == spitfire)		spitfire_xcall_deliver(data0, data1, data2, mask);	else		cheetah_xcall_deliver(data0, data1, data2, mask);	/* NOTE: Caller runs local copy on master. */	put_cpu();}extern unsigned long xcall_sync_tick;static void smp_start_sync_tick_client(int cpu){	cpumask_t mask = cpumask_of_cpu(cpu);	smp_cross_call_masked(&xcall_sync_tick,			      0, 0, 0, mask);}/* Send cross call to all processors except self. */#define smp_cross_call(func, ctx, data1, data2) \	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)struct call_data_struct {	void (*func) (void *info);	void *info;	atomic_t finished;	int wait;};static DEFINE_SPINLOCK(call_lock);static struct call_data_struct *call_data;extern unsigned long xcall_call_function;/* * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */int smp_call_function(void (*func)(void *info), void *info,		      int nonatomic, int wait){	struct call_data_struct data;	int cpus = num_online_cpus() - 1;	long timeout;	if (!cpus)		return 0;	/* Can deadlock when called with interrupts disabled */	WARN_ON(irqs_disabled());	data.func = func;	data.info = info;	atomic_set(&data.finished, 0);	data.wait = wait;	spin_lock(&call_lock);	call_data = &data;	smp_cross_call(&xcall_call_function, 0, 0, 0);	/* 	 * Wait for other cpus to complete function or at	 * least snap the call data.	 */	timeout = 1000000;	while (atomic_read(&data.finished) != cpus) {		if (--timeout <= 0)			goto out_timeout;		barrier();		udelay(1);	}	spin_unlock(&call_lock);	return 0;out_timeout:	spin_unlock(&call_lock);	printk("XCALL: Remote cpus not responding, ncpus=%ld finished=%ld\n",	       (long) num_online_cpus() - 1L,	       (long) atomic_read(&data.finished));	return 0;}void smp_call_function_client(int irq, struct pt_regs *regs){	void (*func) (void *info) = call_data->func;	void *info = call_data->info;	clear_softint(1 << irq);	if (call_data->wait) {		/* let initiator proceed only after completion */		func(info);		atomic_inc(&call_data->finished);	} else {		/* let initiator proceed after getting data */		atomic_inc(&call_data->finished);		func(info);	}}extern unsigned long xcall_flush_tlb_mm;extern unsigned long xcall_flush_tlb_pending;extern unsigned long xcall_flush_tlb_kernel_range;extern unsigned long xcall_flush_tlb_all_spitfire;extern unsigned long xcall_flush_tlb_all_cheetah;extern unsigned long xcall_report_regs;extern unsigned long xcall_receive_signal;#ifdef DCACHE_ALIASING_POSSIBLEextern unsigned long xcall_flush_dcache_page_cheetah;#endifextern unsigned long xcall_flush_dcache_page_spitfire;#ifdef CONFIG_DEBUG_DCFLUSHextern atomic_t dcpage_flushes;extern atomic_t dcpage_flushes_xcall;#endifstatic __inline__ void __local_flush_dcache_page(struct page *page){#ifdef DCACHE_ALIASING_POSSIBLE	__flush_dcache_page(page_address(page),			    ((tlb_type == spitfire) &&			     page_mapping(page) != NULL));#else	if (page_mapping(page) != NULL &&	    tlb_type == spitfire)		__flush_icache_page(__pa(page_address(page)));#endif}void smp_flush_dcache_page_impl(struct page *page, int cpu){	cpumask_t mask = cpumask_of_cpu(cpu);	int this_cpu = get_cpu();#ifdef CONFIG_DEBUG_DCFLUSH	atomic_inc(&dcpage_flushes);#endif	if (cpu == this_cpu) {		__local_flush_dcache_page(page);	} else if (cpu_online(cpu)) {		void *pg_addr = page_address(page);		u64 data0;		if (tlb_type == spitfire) {			data0 =				((u64)&xcall_flush_dcache_page_spitfire);			if (page_mapping(page) != NULL)				data0 |= ((u64)1 << 32);			spitfire_xcall_deliver(data0,					       __pa(pg_addr),					       (u64) pg_addr,					       mask);		} else {#ifdef DCACHE_ALIASING_POSSIBLE			data0 =				((u64)&xcall_flush_dcache_page_cheetah);			cheetah_xcall_deliver(data0,					      __pa(pg_addr),					      0, mask);#endif		}#ifdef CONFIG_DEBUG_DCFLUSH		atomic_inc(&dcpage_flushes_xcall);#endif	}	put_cpu();}void flush_dcache_page_all(struct mm_struct *mm, struct page *page){	void *pg_addr = page_address(page);	cpumask_t mask = cpu_online_map;	u64 data0;	int this_cpu = get_cpu();	cpu_clear(this_cpu, mask);#ifdef CONFIG_DEBUG_DCFLUSH	atomic_inc(&dcpage_flushes);#endif	if (cpus_empty(mask))		goto flush_self;	if (tlb_type == spitfire) {		data0 = ((u64)&xcall_flush_dcache_page_spitfire);		if (page_mapping(page) != NULL)			data0 |= ((u64)1 << 32);		spitfire_xcall_deliver(data0,				       __pa(pg_addr),				       (u64) pg_addr,				       mask);	} else {#ifdef DCACHE_ALIASING_POSSIBLE		data0 = ((u64)&xcall_flush_dcache_page_cheetah);		cheetah_xcall_deliver(data0,				      __pa(pg_addr),				      0, mask);#endif	}#ifdef CONFIG_DEBUG_DCFLUSH	atomic_inc(&dcpage_flushes_xcall);#endif flush_self:	__local_flush_dcache_page(page);	put_cpu();}void smp_receive_signal(int cpu){	cpumask_t mask = cpumask_of_cpu(cpu);	if (cpu_online(cpu)) {		u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);		if (tlb_type == spitfire)			spitfire_xcall_deliver(data0, 0, 0, mask);		else			cheetah_xcall_deliver(data0, 0, 0, mask);	}}void smp_receive_signal_client(int irq, struct pt_regs *regs){	/* Just return, rtrap takes care of the rest. */	clear_softint(1 << irq);}void smp_report_regs(void){	smp_cross_call(&xcall_report_regs, 0, 0, 0);}void smp_flush_tlb_all(void){	if (tlb_type == spitfire)		smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0);	else		smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0);	__flush_tlb_all();}/* We know that the window frames of the user have been flushed * to the stack before we get here because all callers of us * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * * The SMP TLB coherency scheme we use works as follows: * * 1) mm->cpu_vm_mask is a bit mask of which cpus an address *    space has (potentially) executed on, this is the heuristic *    we use to avoid doing cross calls. * *    Also, for flushing from kswapd and also for clones, we *    use cpu_vm_mask as the list of cpus to make run the TLB. * * 2) TLB context numbers are shared globally across all processors *    in the system, this allows us to play several games to avoid *    cross calls. * *    One invariant is that when a cpu switches to a process, and *    that processes tsk->active_mm->cpu_vm_mask does not have the *    current cpu's bit set, that tlb context is flushed locally. * *    If the address space is non-shared (ie. mm->count == 1) we avoid *    cross calls when we want to flush the currently running process's *    tlb state.  This is done by clearing all cpu bits except the current *    processor's in current->active_mm->cpu_vm_mask and performing the *    flush locally only.  This will force any subsequent cpus which run *    this task to flush the context from the local tlb if the process *    migrates to another cpu (again). * * 3) For shared address spaces (threads) and swapping we bite the *    bullet for most cases and perform the cross call (but only to *    the cpus listed in cpu_vm_mask). * *    The performance gain from "optimizing" away the cross call for threads is *    questionable (in theory the big win for threads is the massive sharing of *    address space state across processors). */void smp_flush_tlb_mm(struct mm_struct *mm)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?