smp.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,276 行 · 第 1/3 页

C
1,276
字号
/* smp.c: Sparc64 SMP support. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */#include <linux/kernel.h>#include <linux/sched.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/threads.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/kernel_stat.h>#include <linux/delay.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/fs.h>#include <linux/seq_file.h>#include <linux/cache.h>#include <linux/jiffies.h>#include <linux/profile.h>#include <asm/head.h>#include <asm/ptrace.h>#include <asm/atomic.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/cpudata.h>#include <asm/irq.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/oplib.h>#include <asm/uaccess.h>#include <asm/timer.h>#include <asm/starfire.h>#include <asm/tlb.h>extern int linux_num_cpus;extern void calibrate_delay(void);/* Please don't make this stuff initdata!!!  --DaveM */static unsigned char boot_cpu_id;cpumask_t cpu_online_map = CPU_MASK_NONE;cpumask_t phys_cpu_present_map = CPU_MASK_NONE;static cpumask_t smp_commenced_mask;static cpumask_t cpu_callout_map;void smp_info(struct seq_file *m){	int i;		seq_printf(m, "State:\n");	for (i = 0; i < NR_CPUS; i++) {		if (cpu_online(i))			seq_printf(m,				   "CPU%d:\t\tonline\n", i);	}}void smp_bogo(struct seq_file *m){	int i;		for (i = 0; i < NR_CPUS; i++)		if (cpu_online(i))			seq_printf(m,				   "Cpu%dBogo\t: %lu.%02lu\n"				   "Cpu%dClkTck\t: %016lx\n",				   i, cpu_data(i).udelay_val / (500000/HZ),				   (cpu_data(i).udelay_val / (5000/HZ)) % 100,				   i, cpu_data(i).clock_tick);}void __init smp_store_cpu_info(int id){	int cpu_node;	/* multiplier and counter set by	   smp_setup_percpu_timer()  */	cpu_data(id).udelay_val			= loops_per_jiffy;	cpu_find_by_mid(id, &cpu_node);	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,						     "clock-frequency", 0);	cpu_data(id).pgcache_size		= 0;	cpu_data(id).pte_cache[0]		= NULL;	cpu_data(id).pte_cache[1]		= NULL;	cpu_data(id).pgdcache_size		= 0;	cpu_data(id).pgd_cache			= NULL;	cpu_data(id).idle_volume		= 1;}static void smp_setup_percpu_timer(void);static volatile unsigned long callin_flag = 0;extern void inherit_locked_prom_mappings(int save_p);void __init smp_callin(void){	int cpuid = hard_smp_processor_id();	extern int bigkernel;	extern unsigned long kern_locked_tte_data;	if (bigkernel) {		prom_dtlb_load(sparc64_highest_locked_tlbent()-1, 			kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);		prom_itlb_load(sparc64_highest_locked_tlbent()-1, 			kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);	}	inherit_locked_prom_mappings(0);	__flush_tlb_all();	smp_setup_percpu_timer();	local_irq_enable();	calibrate_delay();	smp_store_cpu_info(cpuid);	callin_flag = 1;	__asm__ __volatile__("membar #Sync\n\t"			     "flush  %%g6" : : : "memory");	/* Clear this or we will die instantly when we	 * schedule back to this idler...	 */	clear_thread_flag(TIF_NEWCHILD);	/* Attach to the address space of init_task. */	atomic_inc(&init_mm.mm_count);	current->active_mm = &init_mm;	while (!cpu_isset(cpuid, smp_commenced_mask))		membar("#LoadLoad");	cpu_set(cpuid, cpu_online_map);}void cpu_panic(void){	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());	panic("SMP bolixed\n");}static unsigned long current_tick_offset;/* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. * * The only change I've made is to rework it so that the master * initiates the synchonization instead of the slave. -DaveM */#define MASTER	0#define SLAVE	(SMP_CACHE_BYTES/sizeof(unsigned long))#define NUM_ROUNDS	64	/* magic value */#define NUM_ITERS	5	/* likewise */static spinlock_t itc_sync_lock = SPIN_LOCK_UNLOCKED;static unsigned long go[SLAVE + 1];#define DEBUG_TICK_SYNC	0static inline long get_delta (long *rt, long *master){	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;	unsigned long tcenter, t0, t1, tm;	unsigned long i;	for (i = 0; i < NUM_ITERS; i++) {		t0 = tick_ops->get_tick();		go[MASTER] = 1;		membar("#StoreLoad");		while (!(tm = go[SLAVE]))			membar("#LoadLoad");		go[SLAVE] = 0;		membar("#StoreStore");		t1 = tick_ops->get_tick();		if (t1 - t0 < best_t1 - best_t0)			best_t0 = t0, best_t1 = t1, best_tm = tm;	}	*rt = best_t1 - best_t0;	*master = best_tm - best_t0;	/* average best_t0 and best_t1 without overflow: */	tcenter = (best_t0/2 + best_t1/2);	if (best_t0 % 2 + best_t1 % 2 == 2)		tcenter++;	return tcenter - best_tm;}void smp_synchronize_tick_client(void){	long i, delta, adj, adjust_latency = 0, done = 0;	unsigned long flags, rt, master_time_stamp, bound;#if DEBUG_TICK_SYNC	struct {		long rt;	/* roundtrip time */		long master;	/* master's timestamp */		long diff;	/* difference between midpoint and master's timestamp */		long lat;	/* estimate of itc adjustment latency */	} t[NUM_ROUNDS];#endif	go[MASTER] = 1;	while (go[MASTER])		membar("#LoadLoad");	local_irq_save(flags);	{		for (i = 0; i < NUM_ROUNDS; i++) {			delta = get_delta(&rt, &master_time_stamp);			if (delta == 0) {				done = 1;	/* let's lock on to this... */				bound = rt;			}			if (!done) {				if (i > 0) {					adjust_latency += -delta;					adj = -delta + adjust_latency/4;				} else					adj = -delta;				tick_ops->add_tick(adj, current_tick_offset);			}#if DEBUG_TICK_SYNC			t[i].rt = rt;			t[i].master = master_time_stamp;			t[i].diff = delta;			t[i].lat = adjust_latency/4;#endif		}	}	local_irq_restore(flags);#if DEBUG_TICK_SYNC	for (i = 0; i < NUM_ROUNDS; i++)		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",		       t[i].rt, t[i].master, t[i].diff, t[i].lat);#endif	printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"	       "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);}static void smp_start_sync_tick_client(int cpu);static void smp_synchronize_one_tick(int cpu){	unsigned long flags, i;	go[MASTER] = 0;	smp_start_sync_tick_client(cpu);	/* wait for client to be ready */	while (!go[MASTER])		membar("#LoadLoad");	/* now let the client proceed into his loop */	go[MASTER] = 0;	membar("#StoreLoad");	spin_lock_irqsave(&itc_sync_lock, flags);	{		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {			while (!go[MASTER])				membar("#LoadLoad");			go[MASTER] = 0;			membar("#StoreStore");			go[SLAVE] = tick_ops->get_tick();			membar("#StoreLoad");		}	}	spin_unlock_irqrestore(&itc_sync_lock, flags);}extern unsigned long sparc64_cpu_startup;/* The OBP cpu startup callback truncates the 3rd arg cookie to * 32-bits (I think) so to be safe we have it read the pointer * contained here so we work on >4GB machines. -DaveM */static struct thread_info *cpu_new_thread = NULL;static int __devinit smp_boot_one_cpu(unsigned int cpu){	unsigned long entry =		(unsigned long)(&sparc64_cpu_startup);	unsigned long cookie =		(unsigned long)(&cpu_new_thread);	struct task_struct *p;	int timeout, ret, cpu_node;	p = fork_idle(cpu);	callin_flag = 0;	cpu_new_thread = p->thread_info;	cpu_set(cpu, cpu_callout_map);	cpu_find_by_mid(cpu, &cpu_node);	prom_startcpu(cpu_node, entry, cookie);	for (timeout = 0; timeout < 5000000; timeout++) {		if (callin_flag)			break;		udelay(100);	}	if (callin_flag) {		ret = 0;	} else {		printk("Processor %d is stuck.\n", cpu);		cpu_clear(cpu, cpu_callout_map);		ret = -ENODEV;	}	cpu_new_thread = NULL;	return ret;}static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu){	u64 result, target;	int stuck, tmp;	if (this_is_starfire) {		/* map to real upaid */		cpu = (((cpu & 0x3c) << 1) |			((cpu & 0x40) >> 4) |			(cpu & 0x3));	}	target = (cpu << 14) | 0x70;again:	/* Ok, this is the real Spitfire Errata #54.	 * One must read back from a UDB internal register	 * after writes to the UDB interrupt dispatch, but	 * before the membar Sync for that write.	 * So we use the high UDB control register (ASI 0x7f,	 * ADDR 0x20) for the dummy read. -DaveM	 */	tmp = 0x40;	__asm__ __volatile__(	"wrpr	%1, %2, %%pstate\n\t"	"stxa	%4, [%0] %3\n\t"	"stxa	%5, [%0+%8] %3\n\t"	"add	%0, %8, %0\n\t"	"stxa	%6, [%0+%8] %3\n\t"	"membar	#Sync\n\t"	"stxa	%%g0, [%7] %3\n\t"	"membar	#Sync\n\t"	"mov	0x20, %%g1\n\t"	"ldxa	[%%g1] 0x7f, %%g0\n\t"	"membar	#Sync"	: "=r" (tmp)	: "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),	  "r" (data0), "r" (data1), "r" (data2), "r" (target),	  "r" (0x10), "0" (tmp)        : "g1");	/* NOTE: PSTATE_IE is still clear. */	stuck = 100000;	do {		__asm__ __volatile__("ldxa [%%g0] %1, %0"			: "=r" (result)			: "i" (ASI_INTR_DISPATCH_STAT));		if (result == 0) {			__asm__ __volatile__("wrpr %0, 0x0, %%pstate"					     : : "r" (pstate));			return;		}		stuck -= 1;		if (stuck == 0)			break;	} while (result & 0x1);	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"			     : : "r" (pstate));	if (stuck == 0) {		printk("CPU[%d]: mondo stuckage result[%016lx]\n",		       smp_processor_id(), result);	} else {		udelay(2);		goto again;	}}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){	u64 pstate;	int i;	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));	for_each_cpu_mask(i, mask)		spitfire_xcall_helper(data0, data1, data2, pstate, i);}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that.  However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){	u64 pstate, ver;	int nack_busy_id, is_jalapeno;	if (cpus_empty(mask))		return;	/* Unfortunately, someone at Sun had the brilliant idea to make the	 * busy/nack fields hard-coded by ITID number for this Ultra-III	 * derivative processor.	 */	__asm__ ("rdpr %%ver, %0" : "=r" (ver));	is_jalapeno = ((ver >> 32) == 0x003e0016);	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?