📄 smp.c

📁 linux-2.6.15.6
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* smp.c: Sparc64 SMP support. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */#include <linux/module.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/threads.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/kernel_stat.h>#include <linux/delay.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/fs.h>#include <linux/seq_file.h>#include <linux/cache.h>#include <linux/jiffies.h>#include <linux/profile.h>#include <linux/bootmem.h>#include <asm/head.h>#include <asm/ptrace.h>#include <asm/atomic.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/cpudata.h>#include <asm/irq.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/oplib.h>#include <asm/uaccess.h>#include <asm/timer.h>#include <asm/starfire.h>#include <asm/tlb.h>extern void calibrate_delay(void);/* Please don't make this stuff initdata!!!  --DaveM */static unsigned char boot_cpu_id;cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;static cpumask_t smp_commenced_mask;static cpumask_t cpu_callout_map;void smp_info(struct seq_file *m){	int i;		seq_printf(m, "State:\n");	for (i = 0; i < NR_CPUS; i++) {		if (cpu_online(i))			seq_printf(m,				   "CPU%d:\t\tonline\n", i);	}}void smp_bogo(struct seq_file *m){	int i;		for (i = 0; i < NR_CPUS; i++)		if (cpu_online(i))			seq_printf(m,				   "Cpu%dBogo\t: %lu.%02lu\n"				   "Cpu%dClkTck\t: %016lx\n",				   i, cpu_data(i).udelay_val / (500000/HZ),				   (cpu_data(i).udelay_val / (5000/HZ)) % 100,				   i, cpu_data(i).clock_tick);}void __init smp_store_cpu_info(int id){	int cpu_node;	/* multiplier and counter set by	   smp_setup_percpu_timer()  */	cpu_data(id).udelay_val			= loops_per_jiffy;	cpu_find_by_mid(id, &cpu_node);	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,						     "clock-frequency", 0);	cpu_data(id).pgcache_size		= 0;	cpu_data(id).pte_cache[0]		= NULL;	cpu_data(id).pte_cache[1]		= NULL;	cpu_data(id).pgd_cache			= NULL;	cpu_data(id).idle_volume		= 1;	cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",						      16 * 1024);	cpu_data(id).dcache_line_size =		prom_getintdefault(cpu_node, "dcache-line-size", 32);	cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size",						      16 * 1024);	cpu_data(id).icache_line_size =		prom_getintdefault(cpu_node, "icache-line-size", 32);	cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size",						      4 * 1024 * 1024);	cpu_data(id).ecache_line_size =		prom_getintdefault(cpu_node, "ecache-line-size", 64);	printk("CPU[%d]: Caches "	       "D[sz(%d):line_sz(%d)] "	       "I[sz(%d):line_sz(%d)] "	       "E[sz(%d):line_sz(%d)]\n",	       id,	       cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,	       cpu_data(id).icache_size, cpu_data(id).icache_line_size,	       cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);}static void smp_setup_percpu_timer(void);static volatile unsigned long callin_flag = 0;extern void inherit_locked_prom_mappings(int save_p);static inline void cpu_setup_percpu_base(unsigned long cpu_id){	__asm__ __volatile__("mov	%0, %%g5\n\t"			     "stxa	%0, [%1] %2\n\t"			     "membar	#Sync"			     : /* no outputs */			     : "r" (__per_cpu_offset(cpu_id)),			       "r" (TSB_REG), "i" (ASI_IMMU));}void __init smp_callin(void){	int cpuid = hard_smp_processor_id();	inherit_locked_prom_mappings(0);	__flush_tlb_all();	cpu_setup_percpu_base(cpuid);	smp_setup_percpu_timer();	if (cheetah_pcache_forced_on)		cheetah_enable_pcache();	local_irq_enable();	calibrate_delay();	smp_store_cpu_info(cpuid);	callin_flag = 1;	__asm__ __volatile__("membar #Sync\n\t"			     "flush  %%g6" : : : "memory");	/* Clear this or we will die instantly when we	 * schedule back to this idler...	 */	current_thread_info()->new_child = 0;	/* Attach to the address space of init_task. */	atomic_inc(&init_mm.mm_count);	current->active_mm = &init_mm;	while (!cpu_isset(cpuid, smp_commenced_mask))		rmb();	cpu_set(cpuid, cpu_online_map);	/* idle thread is expected to have preempt disabled */	preempt_disable();}void cpu_panic(void){	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());	panic("SMP bolixed\n");}static unsigned long current_tick_offset __read_mostly;/* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. * * The only change I've made is to rework it so that the master * initiates the synchonization instead of the slave. -DaveM */#define MASTER	0#define SLAVE	(SMP_CACHE_BYTES/sizeof(unsigned long))#define NUM_ROUNDS	64	/* magic value */#define NUM_ITERS	5	/* likewise */static DEFINE_SPINLOCK(itc_sync_lock);static unsigned long go[SLAVE + 1];#define DEBUG_TICK_SYNC	0static inline long get_delta (long *rt, long *master){	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;	unsigned long tcenter, t0, t1, tm;	unsigned long i;	for (i = 0; i < NUM_ITERS; i++) {		t0 = tick_ops->get_tick();		go[MASTER] = 1;		membar_storeload();		while (!(tm = go[SLAVE]))			rmb();		go[SLAVE] = 0;		wmb();		t1 = tick_ops->get_tick();		if (t1 - t0 < best_t1 - best_t0)			best_t0 = t0, best_t1 = t1, best_tm = tm;	}	*rt = best_t1 - best_t0;	*master = best_tm - best_t0;	/* average best_t0 and best_t1 without overflow: */	tcenter = (best_t0/2 + best_t1/2);	if (best_t0 % 2 + best_t1 % 2 == 2)		tcenter++;	return tcenter - best_tm;}void smp_synchronize_tick_client(void){	long i, delta, adj, adjust_latency = 0, done = 0;	unsigned long flags, rt, master_time_stamp, bound;#if DEBUG_TICK_SYNC	struct {		long rt;	/* roundtrip time */		long master;	/* master's timestamp */		long diff;	/* difference between midpoint and master's timestamp */		long lat;	/* estimate of itc adjustment latency */	} t[NUM_ROUNDS];#endif	go[MASTER] = 1;	while (go[MASTER])		rmb();	local_irq_save(flags);	{		for (i = 0; i < NUM_ROUNDS; i++) {			delta = get_delta(&rt, &master_time_stamp);			if (delta == 0) {				done = 1;	/* let's lock on to this... */				bound = rt;			}			if (!done) {				if (i > 0) {					adjust_latency += -delta;					adj = -delta + adjust_latency/4;				} else					adj = -delta;				tick_ops->add_tick(adj, current_tick_offset);			}#if DEBUG_TICK_SYNC			t[i].rt = rt;			t[i].master = master_time_stamp;			t[i].diff = delta;			t[i].lat = adjust_latency/4;#endif		}	}	local_irq_restore(flags);#if DEBUG_TICK_SYNC	for (i = 0; i < NUM_ROUNDS; i++)		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",		       t[i].rt, t[i].master, t[i].diff, t[i].lat);#endif	printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"	       "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);}static void smp_start_sync_tick_client(int cpu);static void smp_synchronize_one_tick(int cpu){	unsigned long flags, i;	go[MASTER] = 0;	smp_start_sync_tick_client(cpu);	/* wait for client to be ready */	while (!go[MASTER])		rmb();	/* now let the client proceed into his loop */	go[MASTER] = 0;	membar_storeload();	spin_lock_irqsave(&itc_sync_lock, flags);	{		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {			while (!go[MASTER])				rmb();			go[MASTER] = 0;			wmb();			go[SLAVE] = tick_ops->get_tick();			membar_storeload();		}	}	spin_unlock_irqrestore(&itc_sync_lock, flags);}extern unsigned long sparc64_cpu_startup;/* The OBP cpu startup callback truncates the 3rd arg cookie to * 32-bits (I think) so to be safe we have it read the pointer * contained here so we work on >4GB machines. -DaveM */static struct thread_info *cpu_new_thread = NULL;static int __devinit smp_boot_one_cpu(unsigned int cpu){	unsigned long entry =		(unsigned long)(&sparc64_cpu_startup);	unsigned long cookie =		(unsigned long)(&cpu_new_thread);	struct task_struct *p;	int timeout, ret, cpu_node;	p = fork_idle(cpu);	callin_flag = 0;	cpu_new_thread = p->thread_info;	cpu_set(cpu, cpu_callout_map);	cpu_find_by_mid(cpu, &cpu_node);	prom_startcpu(cpu_node, entry, cookie);	for (timeout = 0; timeout < 5000000; timeout++) {		if (callin_flag)			break;		udelay(100);	}	if (callin_flag) {		ret = 0;	} else {		printk("Processor %d is stuck.\n", cpu);		cpu_clear(cpu, cpu_callout_map);		ret = -ENODEV;	}	cpu_new_thread = NULL;	return ret;}static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu){	u64 result, target;	int stuck, tmp;	if (this_is_starfire) {		/* map to real upaid */		cpu = (((cpu & 0x3c) << 1) |			((cpu & 0x40) >> 4) |			(cpu & 0x3));	}	target = (cpu << 14) | 0x70;again:	/* Ok, this is the real Spitfire Errata #54.	 * One must read back from a UDB internal register	 * after writes to the UDB interrupt dispatch, but	 * before the membar Sync for that write.	 * So we use the high UDB control register (ASI 0x7f,	 * ADDR 0x20) for the dummy read. -DaveM	 */	tmp = 0x40;	__asm__ __volatile__(	"wrpr	%1, %2, %%pstate\n\t"	"stxa	%4, [%0] %3\n\t"	"stxa	%5, [%0+%8] %3\n\t"	"add	%0, %8, %0\n\t"	"stxa	%6, [%0+%8] %3\n\t"	"membar	#Sync\n\t"	"stxa	%%g0, [%7] %3\n\t"	"membar	#Sync\n\t"	"mov	0x20, %%g1\n\t"	"ldxa	[%%g1] 0x7f, %%g0\n\t"	"membar	#Sync"	: "=r" (tmp)	: "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),	  "r" (data0), "r" (data1), "r" (data2), "r" (target),	  "r" (0x10), "0" (tmp)        : "g1");	/* NOTE: PSTATE_IE is still clear. */	stuck = 100000;	do {		__asm__ __volatile__("ldxa [%%g0] %1, %0"			: "=r" (result)			: "i" (ASI_INTR_DISPATCH_STAT));		if (result == 0) {			__asm__ __volatile__("wrpr %0, 0x0, %%pstate"					     : : "r" (pstate));			return;		}		stuck -= 1;		if (stuck == 0)			break;	} while (result & 0x1);	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"			     : : "r" (pstate));	if (stuck == 0) {		printk("CPU[%d]: mondo stuckage result[%016lx]\n",		       smp_processor_id(), result);	} else {		udelay(2);		goto again;	}}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){	u64 pstate;	int i;	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));	for_each_cpu_mask(i, mask)		spitfire_xcall_helper(data0, data1, data2, pstate, i);}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that.  However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){	u64 pstate, ver;	int nack_busy_id, is_jalapeno;	if (cpus_empty(mask))		return;	/* Unfortunately, someone at Sun had the brilliant idea to make the	 * busy/nack fields hard-coded by ITID number for this Ultra-III	 * derivative processor.	 */	__asm__ ("rdpr %%ver, %0" : "=r" (ver));	is_jalapeno = ((ver >> 32) == 0x003e0016);	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));retry:	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"			     : : "r" (pstate), "i" (PSTATE_IE));	/* Setup the dispatch data registers. */	__asm__ __volatile__("stxa	%0, [%3] %6\n\t"			     "stxa	%1, [%4] %6\n\t"			     "stxa	%2, [%5] %6\n\t"			     "membar	#Sync\n\t"			     : /* no outputs */			     : "r" (data0), "r" (data1), "r" (data2),			       "r" (0x40), "r" (0x50), "r" (0x60),			       "i" (ASI_INTR_W));	nack_busy_id = 0;	{		int i;		for_each_cpu_mask(i, mask) {			u64 target = (i << 14) | 0x70;			if (!is_jalapeno)				target |= (nack_busy_id << 24);			__asm__ __volatile__(				"stxa	%%g0, [%0] %1\n\t"				"membar	#Sync\n\t"				: /* no outputs */				: "r" (target), "i" (ASI_INTR_W));			nack_busy_id++;		}	}	/* Now, poll for completion. */	{		u64 dispatch_stat;		long stuck;		stuck = 100000 * nack_busy_id;		do {			__asm__ __volatile__("ldxa	[%%g0] %1, %0"					     : "=r" (dispatch_stat)					     : "i" (ASI_INTR_DISPATCH_STAT));			if (dispatch_stat == 0UL) {				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"						     : : "r" (pstate));				return;			}			if (!--stuck)				break;		} while (dispatch_stat & 0x5555555555555555UL);		__asm__ __volatile__("wrpr %0, 0x0, %%pstate"				     : : "r" (pstate));		if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {			/* Busy bits will not clear, continue instead			 * of freezing up on this cpu.			 */			printk("CPU[%d]: mondo stuckage result[%016lx]\n",			       smp_processor_id(), dispatch_stat);		} else {			int i, this_busy_nack = 0;			/* Delay some random time with interrupts enabled			 * to prevent deadlock.			 */			udelay(2 * nack_busy_id);			/* Clear out the mask bits for cpus which did not			 * NACK us.			 */			for_each_cpu_mask(i, mask) {				u64 check_mask;				if (is_jalapeno)					check_mask = (0x2UL << (2*i));				else					check_mask = (0x2UL <<						      this_busy_nack);				if ((dispatch_stat & check_mask) == 0)					cpu_clear(i, mask);				this_busy_nack += 2;			}			goto retry;		}	}}/* Send cross call to all processors mentioned in MASK * except self. */static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask){	u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));	int this_cpu = get_cpu();	cpus_and(mask, mask, cpu_online_map);	cpu_clear(this_cpu, mask);	if (tlb_type == spitfire)		spitfire_xcall_deliver(data0, data1, data2, mask);	else		cheetah_xcall_deliver(data0, data1, data2, mask);	/* NOTE: Caller runs local copy on master. */	put_cpu();}extern unsigned long xcall_sync_tick;static void smp_start_sync_tick_client(int cpu){	cpumask_t mask = cpumask_of_cpu(cpu);	smp_cross_call_masked(&xcall_sync_tick,			      0, 0, 0, mask);}/* Send cross call to all processors except self. */#define smp_cross_call(func, ctx, data1, data2) \	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)struct call_data_struct {	void (*func) (void *info);	void *info;	atomic_t finished;	int wait;};static DEFINE_SPINLOCK(call_lock);static struct call_data_struct *call_data;extern unsigned long xcall_call_function;/* * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */int smp_call_function(void (*func)(void *info), void *info,		      int nonatomic, int wait){	struct call_data_struct data;	int cpus = num_online_cpus() - 1;	long timeout;	if (!cpus)		return 0;	/* Can deadlock when called with interrupts disabled */	WARN_ON(irqs_disabled());
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -