📄 smp.c
字号:
/* smp.c: Sparc64 SMP support. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */#include <linux/kernel.h>#include <linux/sched.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/threads.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/kernel_stat.h>#include <linux/delay.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/fs.h>#include <linux/seq_file.h>#include <linux/cache.h>#include <asm/head.h>#include <asm/ptrace.h>#include <asm/atomic.h>#include <asm/irq.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/oplib.h>#include <asm/hardirq.h>#include <asm/softirq.h>#include <asm/uaccess.h>#include <asm/timer.h>#include <asm/starfire.h>#define __KERNEL_SYSCALLS__#include <linux/unistd.h>extern int linux_num_cpus;extern void calibrate_delay(void);extern unsigned prom_cpu_nodes[];cpuinfo_sparc cpu_data[NR_CPUS];volatile int __cpu_number_map[NR_CPUS] __attribute__ ((aligned (SMP_CACHE_BYTES)));volatile int __cpu_logical_map[NR_CPUS] __attribute__ ((aligned (SMP_CACHE_BYTES)));/* Please don't make this stuff initdata!!! --DaveM */static unsigned char boot_cpu_id = 0;static int smp_activated = 0;/* Kernel spinlock */spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;volatile int smp_processors_ready = 0;unsigned long cpu_present_map = 0;int smp_num_cpus = 1;int smp_threads_ready = 0;void __init smp_setup(char *str, int *ints){ /* XXX implement me XXX */}static int max_cpus = NR_CPUS;static int __init maxcpus(char *str){ get_option(&str, &max_cpus); return 1;}__setup("maxcpus=", maxcpus);void smp_info(struct seq_file *m){ int i; seq_printf(m, "State:\n"); for (i = 0; i < NR_CPUS; i++) { if (cpu_present_map & (1UL << i)) seq_printf(m, "CPU%d:\t\tonline\n", i); }}void smp_bogo(struct seq_file *m){ int i; for (i = 0; i < NR_CPUS; i++) if (cpu_present_map & (1UL << i)) seq_printf(m, "Cpu%dBogo\t: %lu.%02lu\n" "Cpu%dClkTck\t: %016lx\n", i, cpu_data[i].udelay_val / (500000/HZ), (cpu_data[i].udelay_val / (5000/HZ)) % 100, i, cpu_data[i].clock_tick);}void __init smp_store_cpu_info(int id){ int i, no; /* multiplier and counter set by smp_setup_percpu_timer() */ cpu_data[id].udelay_val = loops_per_jiffy; for (no = 0; no < linux_num_cpus; no++) if (linux_cpus[no].mid == id) break; cpu_data[id].clock_tick = prom_getintdefault(linux_cpus[no].prom_node, "clock-frequency", 0); cpu_data[id].pgcache_size = 0; cpu_data[id].pte_cache[0] = NULL; cpu_data[id].pte_cache[1] = NULL; cpu_data[id].pgdcache_size = 0; cpu_data[id].pgd_cache = NULL; cpu_data[id].idle_volume = 1; for (i = 0; i < 16; i++) cpu_data[id].irq_worklists[i] = 0;}void __init smp_commence(void){}static void smp_setup_percpu_timer(void);static volatile unsigned long callin_flag = 0;extern void inherit_locked_prom_mappings(int save_p);extern void cpu_probe(void);void __init smp_callin(void){ int cpuid = hard_smp_processor_id(); unsigned long pstate; extern int bigkernel; extern unsigned long kern_locked_tte_data; if (bigkernel) { prom_dtlb_load(sparc64_highest_locked_tlbent()-1, kern_locked_tte_data + 0x400000, KERNBASE + 0x400000); prom_itlb_load(sparc64_highest_locked_tlbent()-1, kern_locked_tte_data + 0x400000, KERNBASE + 0x400000); } inherit_locked_prom_mappings(0); __flush_cache_all(); __flush_tlb_all(); cpu_probe(); /* Guarentee that the following sequences execute * uninterrupted. */ __asm__ __volatile__("rdpr %%pstate, %0\n\t" "wrpr %0, %1, %%pstate" : "=r" (pstate) : "i" (PSTATE_IE)); /* Set things up so user can access tick register for profiling * purposes. Also workaround BB_ERRATA_1 by doing a dummy * read back of %tick after writing it. */ __asm__ __volatile__(" sethi %%hi(0x80000000), %%g1 ba,pt %%xcc, 1f sllx %%g1, 32, %%g1 .align 641: rd %%tick, %%g2 add %%g2, 6, %%g2 andn %%g2, %%g1, %%g2 wrpr %%g2, 0, %%tick rdpr %%tick, %%g0" : /* no outputs */ : /* no inputs */ : "g1", "g2"); if (SPARC64_USE_STICK) { /* Let the user get at STICK too. */ __asm__ __volatile__(" sethi %%hi(0x80000000), %%g1 sllx %%g1, 32, %%g1 rd %%asr24, %%g2 andn %%g2, %%g1, %%g2 wr %%g2, 0, %%asr24" : /* no outputs */ : /* no inputs */ : "g1", "g2"); } /* Restore PSTATE_IE. */ __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : /* no outputs */ : "r" (pstate)); smp_setup_percpu_timer(); __sti(); calibrate_delay(); smp_store_cpu_info(cpuid); callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); /* Clear this or we will die instantly when we * schedule back to this idler... */ current->thread.flags &= ~(SPARC_FLAG_NEWCHILD); /* Attach to the address space of init_task. */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; while (!smp_threads_ready) membar("#LoadLoad");}extern int cpu_idle(void);extern void init_IRQ(void);void initialize_secondary(void){}int start_secondary(void *unused){ trap_init(); init_IRQ(); return cpu_idle();}void cpu_panic(void){ printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id()); panic("SMP bolixed\n");}extern struct prom_cpuinfo linux_cpus[64];extern unsigned long sparc64_cpu_startup;/* The OBP cpu startup callback truncates the 3rd arg cookie to * 32-bits (I think) so to be safe we have it read the pointer * contained here so we work on >4GB machines. -DaveM */static struct task_struct *cpu_new_task = NULL;void __init smp_boot_cpus(void){ int cpucount = 0, i; printk("Entering UltraSMPenguin Mode...\n"); __sti(); smp_store_cpu_info(boot_cpu_id); init_idle(); if (linux_num_cpus == 1) return; for (i = 0; i < NR_CPUS; i++) { if (i == boot_cpu_id) continue; if ((cpucount + 1) == max_cpus) goto ignorecpu; if (cpu_present_map & (1UL << i)) { unsigned long entry = (unsigned long)(&sparc64_cpu_startup); unsigned long cookie = (unsigned long)(&cpu_new_task); struct task_struct *p; int timeout; int no; prom_printf("Starting CPU %d... ", i); kernel_thread(start_secondary, NULL, CLONE_PID); cpucount++; p = init_task.prev_task; init_tasks[cpucount] = p; p->processor = i; p->cpus_runnable = 1UL << i; /* we schedule the first task manually */ del_from_runqueue(p); unhash_process(p); callin_flag = 0; for (no = 0; no < linux_num_cpus; no++) if (linux_cpus[no].mid == i) break; cpu_new_task = p; prom_startcpu(linux_cpus[no].prom_node, entry, cookie); for (timeout = 0; timeout < 5000000; timeout++) { if (callin_flag) break; udelay(100); } if (callin_flag) { __cpu_number_map[i] = cpucount; __cpu_logical_map[cpucount] = i; prom_cpu_nodes[i] = linux_cpus[no].prom_node; prom_printf("OK\n"); } else { cpucount--; printk("Processor %d is stuck.\n", i); prom_printf("FAILED\n"); } } if (!callin_flag) {ignorecpu: cpu_present_map &= ~(1UL << i); __cpu_number_map[i] = -1; } } cpu_new_task = NULL; if (cpucount == 0) { if (max_cpus != 1) printk("Error: only one processor found.\n"); cpu_present_map = (1UL << smp_processor_id()); } else { unsigned long bogosum = 0; for (i = 0; i < NR_CPUS; i++) { if (cpu_present_map & (1UL << i)) bogosum += cpu_data[i].udelay_val; } printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); smp_activated = 1; smp_num_cpus = cpucount + 1; } smp_processors_ready = 1; membar("#StoreStore | #StoreLoad");}static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu){ u64 result, target; int stuck, tmp; if (this_is_starfire) { /* map to real upaid */ cpu = (((cpu & 0x3c) << 1) | ((cpu & 0x40) >> 4) | (cpu & 0x3)); } target = (cpu << 14) | 0x70;again: /* Ok, this is the real Spitfire Errata #54. * One must read back from a UDB internal register * after writes to the UDB interrupt dispatch, but * before the membar Sync for that write. * So we use the high UDB control register (ASI 0x7f, * ADDR 0x20) for the dummy read. -DaveM */ tmp = 0x40; __asm__ __volatile__(" wrpr %1, %2, %%pstate stxa %4, [%0] %3 stxa %5, [%0+%8] %3 add %0, %8, %0 stxa %6, [%0+%8] %3 membar #Sync stxa %%g0, [%7] %3 membar #Sync mov 0x20, %%g1 ldxa [%%g1] 0x7f, %%g0 membar #Sync" : "=r" (tmp) : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp) : "g1"); /* NOTE: PSTATE_IE is still clear. */ stuck = 100000; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (result) : "i" (ASI_INTR_DISPATCH_STAT)); if (result == 0) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } stuck -= 1; if (stuck == 0) break; } while (result & 0x1); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if (stuck == 0) { printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), result); } else { udelay(2); goto again; }}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, unsigned long mask){ int ncpus = smp_num_cpus - 1; int i; u64 pstate; __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); for (i = 0; (i < NR_CPUS) && ncpus; i++) { if (mask & (1UL << i)) { spitfire_xcall_helper(data0, data1, data2, pstate, i); ncpus--; } }}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that. However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */#if NR_CPUS > 32#error Fixup cheetah_xcall_deliver Dave...#endifstatic void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, unsigned long mask){ u64 pstate; int nack_busy_id; if (!mask) return; __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));retry: __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" : : "r" (pstate), "i" (PSTATE_IE)); /* Setup the dispatch data registers. */ __asm__ __volatile__("stxa %0, [%3] %6\n\t" "stxa %1, [%4] %6\n\t" "stxa %2, [%5] %6\n\t" "membar #Sync\n\t" : /* no outputs */ : "r" (data0), "r" (data1), "r" (data2), "r" (0x40), "r" (0x50), "r" (0x60), "i" (ASI_INTR_W)); nack_busy_id = 0; { int i, ncpus = smp_num_cpus - 1; for (i = 0; (i < NR_CPUS) && ncpus; i++) { if (mask & (1UL << i)) { u64 target = (i << 14) | 0x70; target |= (nack_busy_id++ << 24); __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" "membar #Sync\n\t" : /* no outputs */ : "r" (target), "i" (ASI_INTR_W)); ncpus--; } } } /* Now, poll for completion. */ { u64 dispatch_stat; long stuck; stuck = 100000 * nack_busy_id; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (dispatch_stat) : "i" (ASI_INTR_DISPATCH_STAT)); if (dispatch_stat == 0UL) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } if (!--stuck) break; } while (dispatch_stat & 0x5555555555555555UL); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) { /* Busy bits will not clear, continue instead * of freezing up on this cpu. */ printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), dispatch_stat); } else { int i, this_busy_nack = 0; /* Delay some random time with interrupts enabled * to prevent deadlock. */ udelay(2 * nack_busy_id); /* Clear out the mask bits for cpus which did not * NACK us. */ for (i = 0; i < NR_CPUS; i++) { if (mask & (1UL << i)) { if ((dispatch_stat & (0x2 << this_busy_nack)) == 0) mask &= ~(1UL << i); this_busy_nack += 2; } } goto retry; } }}/* Send cross call to all processors mentioned in MASK * except self. */static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, unsigned long mask){ if (smp_processors_ready) { u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); mask &= ~(1UL<<smp_processor_id()); if (tlb_type == spitfire) spitfire_xcall_deliver(data0, data1, data2, mask); else cheetah_xcall_deliver(data0, data1, data2, mask); /* NOTE: Caller runs local copy on master. */ }}/* Send cross call to all processors except self. */#define smp_cross_call(func, ctx, data1, data2) \ smp_cross_call_masked(func, ctx, data1, data2, cpu_present_map)struct call_data_struct { void (*func) (void *info); void *info; atomic_t finished; int wait;};static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;static struct call_data_struct *call_data;extern unsigned long xcall_call_function;int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait){ struct call_data_struct data; int cpus = smp_num_cpus - 1; long timeout; if (!cpus) return 0; data.func = func; data.info = info; atomic_set(&data.finished, 0); data.wait = wait; spin_lock_bh(&call_lock); call_data = &data; smp_cross_call(&xcall_call_function, 0, 0, 0); /* * Wait for other cpus to complete function or at * least snap the call data. */ timeout = 1000000; while (atomic_read(&data.finished) != cpus) { if (--timeout <= 0) goto out_timeout; barrier(); udelay(1); } spin_unlock_bh(&call_lock);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -