📄 smp.c
字号:
/* smp.c: Sparc64 SMP support. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */#include <linux/module.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/threads.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/kernel_stat.h>#include <linux/delay.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/fs.h>#include <linux/seq_file.h>#include <linux/cache.h>#include <linux/jiffies.h>#include <linux/profile.h>#include <linux/bootmem.h>#include <asm/head.h>#include <asm/ptrace.h>#include <asm/atomic.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/cpudata.h>#include <asm/irq.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/oplib.h>#include <asm/uaccess.h>#include <asm/timer.h>#include <asm/starfire.h>#include <asm/tlb.h>extern void calibrate_delay(void);/* Please don't make this stuff initdata!!! --DaveM */static unsigned char boot_cpu_id;cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;static cpumask_t smp_commenced_mask;static cpumask_t cpu_callout_map;void smp_info(struct seq_file *m){ int i; seq_printf(m, "State:\n"); for (i = 0; i < NR_CPUS; i++) { if (cpu_online(i)) seq_printf(m, "CPU%d:\t\tonline\n", i); }}void smp_bogo(struct seq_file *m){ int i; for (i = 0; i < NR_CPUS; i++) if (cpu_online(i)) seq_printf(m, "Cpu%dBogo\t: %lu.%02lu\n" "Cpu%dClkTck\t: %016lx\n", i, cpu_data(i).udelay_val / (500000/HZ), (cpu_data(i).udelay_val / (5000/HZ)) % 100, i, cpu_data(i).clock_tick);}void __init smp_store_cpu_info(int id){ int cpu_node; /* multiplier and counter set by smp_setup_percpu_timer() */ cpu_data(id).udelay_val = loops_per_jiffy; cpu_find_by_mid(id, &cpu_node); cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); cpu_data(id).pgcache_size = 0; cpu_data(id).pte_cache[0] = NULL; cpu_data(id).pte_cache[1] = NULL; cpu_data(id).pgd_cache = NULL; cpu_data(id).idle_volume = 1; cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", 16 * 1024); cpu_data(id).dcache_line_size = prom_getintdefault(cpu_node, "dcache-line-size", 32); cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size", 16 * 1024); cpu_data(id).icache_line_size = prom_getintdefault(cpu_node, "icache-line-size", 32); cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size", 4 * 1024 * 1024); cpu_data(id).ecache_line_size = prom_getintdefault(cpu_node, "ecache-line-size", 64); printk("CPU[%d]: Caches " "D[sz(%d):line_sz(%d)] " "I[sz(%d):line_sz(%d)] " "E[sz(%d):line_sz(%d)]\n", id, cpu_data(id).dcache_size, cpu_data(id).dcache_line_size, cpu_data(id).icache_size, cpu_data(id).icache_line_size, cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);}static void smp_setup_percpu_timer(void);static volatile unsigned long callin_flag = 0;extern void inherit_locked_prom_mappings(int save_p);static inline void cpu_setup_percpu_base(unsigned long cpu_id){ __asm__ __volatile__("mov %0, %%g5\n\t" "stxa %0, [%1] %2\n\t" "membar #Sync" : /* no outputs */ : "r" (__per_cpu_offset(cpu_id)), "r" (TSB_REG), "i" (ASI_IMMU));}void __init smp_callin(void){ int cpuid = hard_smp_processor_id(); inherit_locked_prom_mappings(0); __flush_tlb_all(); cpu_setup_percpu_base(cpuid); smp_setup_percpu_timer(); if (cheetah_pcache_forced_on) cheetah_enable_pcache(); local_irq_enable(); calibrate_delay(); smp_store_cpu_info(cpuid); callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); /* Clear this or we will die instantly when we * schedule back to this idler... */ current_thread_info()->new_child = 0; /* Attach to the address space of init_task. */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; while (!cpu_isset(cpuid, smp_commenced_mask)) rmb(); cpu_set(cpuid, cpu_online_map); /* idle thread is expected to have preempt disabled */ preempt_disable();}void cpu_panic(void){ printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id()); panic("SMP bolixed\n");}static unsigned long current_tick_offset __read_mostly;/* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. * * The only change I've made is to rework it so that the master * initiates the synchonization instead of the slave. -DaveM */#define MASTER 0#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))#define NUM_ROUNDS 64 /* magic value */#define NUM_ITERS 5 /* likewise */static DEFINE_SPINLOCK(itc_sync_lock);static unsigned long go[SLAVE + 1];#define DEBUG_TICK_SYNC 0static inline long get_delta (long *rt, long *master){ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; unsigned long tcenter, t0, t1, tm; unsigned long i; for (i = 0; i < NUM_ITERS; i++) { t0 = tick_ops->get_tick(); go[MASTER] = 1; membar_storeload(); while (!(tm = go[SLAVE])) rmb(); go[SLAVE] = 0; wmb(); t1 = tick_ops->get_tick(); if (t1 - t0 < best_t1 - best_t0) best_t0 = t0, best_t1 = t1, best_tm = tm; } *rt = best_t1 - best_t0; *master = best_tm - best_t0; /* average best_t0 and best_t1 without overflow: */ tcenter = (best_t0/2 + best_t1/2); if (best_t0 % 2 + best_t1 % 2 == 2) tcenter++; return tcenter - best_tm;}void smp_synchronize_tick_client(void){ long i, delta, adj, adjust_latency = 0, done = 0; unsigned long flags, rt, master_time_stamp, bound;#if DEBUG_TICK_SYNC struct { long rt; /* roundtrip time */ long master; /* master's timestamp */ long diff; /* difference between midpoint and master's timestamp */ long lat; /* estimate of itc adjustment latency */ } t[NUM_ROUNDS];#endif go[MASTER] = 1; while (go[MASTER]) rmb(); local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS; i++) { delta = get_delta(&rt, &master_time_stamp); if (delta == 0) { done = 1; /* let's lock on to this... */ bound = rt; } if (!done) { if (i > 0) { adjust_latency += -delta; adj = -delta + adjust_latency/4; } else adj = -delta; tick_ops->add_tick(adj, current_tick_offset); }#if DEBUG_TICK_SYNC t[i].rt = rt; t[i].master = master_time_stamp; t[i].diff = delta; t[i].lat = adjust_latency/4;#endif } } local_irq_restore(flags);#if DEBUG_TICK_SYNC for (i = 0; i < NUM_ROUNDS; i++) printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", t[i].rt, t[i].master, t[i].diff, t[i].lat);#endif printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles," "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);}static void smp_start_sync_tick_client(int cpu);static void smp_synchronize_one_tick(int cpu){ unsigned long flags, i; go[MASTER] = 0; smp_start_sync_tick_client(cpu); /* wait for client to be ready */ while (!go[MASTER]) rmb(); /* now let the client proceed into his loop */ go[MASTER] = 0; membar_storeload(); spin_lock_irqsave(&itc_sync_lock, flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { while (!go[MASTER]) rmb(); go[MASTER] = 0; wmb(); go[SLAVE] = tick_ops->get_tick(); membar_storeload(); } } spin_unlock_irqrestore(&itc_sync_lock, flags);}extern unsigned long sparc64_cpu_startup;/* The OBP cpu startup callback truncates the 3rd arg cookie to * 32-bits (I think) so to be safe we have it read the pointer * contained here so we work on >4GB machines. -DaveM */static struct thread_info *cpu_new_thread = NULL;static int __devinit smp_boot_one_cpu(unsigned int cpu){ unsigned long entry = (unsigned long)(&sparc64_cpu_startup); unsigned long cookie = (unsigned long)(&cpu_new_thread); struct task_struct *p; int timeout, ret, cpu_node; p = fork_idle(cpu); callin_flag = 0; cpu_new_thread = p->thread_info; cpu_set(cpu, cpu_callout_map); cpu_find_by_mid(cpu, &cpu_node); prom_startcpu(cpu_node, entry, cookie); for (timeout = 0; timeout < 5000000; timeout++) { if (callin_flag) break; udelay(100); } if (callin_flag) { ret = 0; } else { printk("Processor %d is stuck.\n", cpu); cpu_clear(cpu, cpu_callout_map); ret = -ENODEV; } cpu_new_thread = NULL; return ret;}static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu){ u64 result, target; int stuck, tmp; if (this_is_starfire) { /* map to real upaid */ cpu = (((cpu & 0x3c) << 1) | ((cpu & 0x40) >> 4) | (cpu & 0x3)); } target = (cpu << 14) | 0x70;again: /* Ok, this is the real Spitfire Errata #54. * One must read back from a UDB internal register * after writes to the UDB interrupt dispatch, but * before the membar Sync for that write. * So we use the high UDB control register (ASI 0x7f, * ADDR 0x20) for the dummy read. -DaveM */ tmp = 0x40; __asm__ __volatile__( "wrpr %1, %2, %%pstate\n\t" "stxa %4, [%0] %3\n\t" "stxa %5, [%0+%8] %3\n\t" "add %0, %8, %0\n\t" "stxa %6, [%0+%8] %3\n\t" "membar #Sync\n\t" "stxa %%g0, [%7] %3\n\t" "membar #Sync\n\t" "mov 0x20, %%g1\n\t" "ldxa [%%g1] 0x7f, %%g0\n\t" "membar #Sync" : "=r" (tmp) : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp) : "g1"); /* NOTE: PSTATE_IE is still clear. */ stuck = 100000; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (result) : "i" (ASI_INTR_DISPATCH_STAT)); if (result == 0) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } stuck -= 1; if (stuck == 0) break; } while (result & 0x1); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if (stuck == 0) { printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), result); } else { udelay(2); goto again; }}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){ u64 pstate; int i; __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); for_each_cpu_mask(i, mask) spitfire_xcall_helper(data0, data1, data2, pstate, i);}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that. However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){ u64 pstate, ver; int nack_busy_id, is_jalapeno; if (cpus_empty(mask)) return; /* Unfortunately, someone at Sun had the brilliant idea to make the * busy/nack fields hard-coded by ITID number for this Ultra-III * derivative processor. */ __asm__ ("rdpr %%ver, %0" : "=r" (ver)); is_jalapeno = ((ver >> 32) == 0x003e0016); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));retry: __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" : : "r" (pstate), "i" (PSTATE_IE)); /* Setup the dispatch data registers. */ __asm__ __volatile__("stxa %0, [%3] %6\n\t" "stxa %1, [%4] %6\n\t" "stxa %2, [%5] %6\n\t" "membar #Sync\n\t" : /* no outputs */ : "r" (data0), "r" (data1), "r" (data2), "r" (0x40), "r" (0x50), "r" (0x60), "i" (ASI_INTR_W)); nack_busy_id = 0; { int i; for_each_cpu_mask(i, mask) { u64 target = (i << 14) | 0x70; if (!is_jalapeno) target |= (nack_busy_id << 24); __asm__ __volatile__( "stxa %%g0, [%0] %1\n\t" "membar #Sync\n\t" : /* no outputs */ : "r" (target), "i" (ASI_INTR_W)); nack_busy_id++; } } /* Now, poll for completion. */ { u64 dispatch_stat; long stuck; stuck = 100000 * nack_busy_id; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (dispatch_stat) : "i" (ASI_INTR_DISPATCH_STAT)); if (dispatch_stat == 0UL) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } if (!--stuck) break; } while (dispatch_stat & 0x5555555555555555UL); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) { /* Busy bits will not clear, continue instead * of freezing up on this cpu. */ printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), dispatch_stat); } else { int i, this_busy_nack = 0; /* Delay some random time with interrupts enabled * to prevent deadlock. */ udelay(2 * nack_busy_id); /* Clear out the mask bits for cpus which did not * NACK us. */ for_each_cpu_mask(i, mask) { u64 check_mask; if (is_jalapeno) check_mask = (0x2UL << (2*i)); else check_mask = (0x2UL << this_busy_nack); if ((dispatch_stat & check_mask) == 0) cpu_clear(i, mask); this_busy_nack += 2; } goto retry; } }}/* Send cross call to all processors mentioned in MASK * except self. */static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask){ u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); int this_cpu = get_cpu(); cpus_and(mask, mask, cpu_online_map); cpu_clear(this_cpu, mask); if (tlb_type == spitfire) spitfire_xcall_deliver(data0, data1, data2, mask); else cheetah_xcall_deliver(data0, data1, data2, mask); /* NOTE: Caller runs local copy on master. */ put_cpu();}extern unsigned long xcall_sync_tick;static void smp_start_sync_tick_client(int cpu){ cpumask_t mask = cpumask_of_cpu(cpu); smp_cross_call_masked(&xcall_sync_tick, 0, 0, 0, mask);}/* Send cross call to all processors except self. */#define smp_cross_call(func, ctx, data1, data2) \ smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)struct call_data_struct { void (*func) (void *info); void *info; atomic_t finished; int wait;};static DEFINE_SPINLOCK(call_lock);static struct call_data_struct *call_data;extern unsigned long xcall_call_function;/* * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait){ struct call_data_struct data; int cpus = num_online_cpus() - 1; long timeout; if (!cpus) return 0; /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -