smp.c
来自「h内核」· C语言 代码 · 共 1,266 行 · 第 1/3 页
C
1,266 行
/* smp.c: Sparc64 SMP support. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */#include <linux/kernel.h>#include <linux/sched.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/threads.h>#include <linux/smp.h>#include <linux/smp_lock.h>#include <linux/interrupt.h>#include <linux/kernel_stat.h>#include <linux/delay.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/fs.h>#include <linux/seq_file.h>#include <linux/cache.h>#include <linux/jiffies.h>#include <linux/profile.h>#include <asm/head.h>#include <asm/ptrace.h>#include <asm/atomic.h>#include <asm/tlbflush.h>#include <asm/mmu_context.h>#include <asm/cpudata.h>#include <asm/irq.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/oplib.h>#include <asm/uaccess.h>#include <asm/timer.h>#include <asm/starfire.h>#include <asm/tlb.h>extern int linux_num_cpus;extern void calibrate_delay(void);/* Please don't make this stuff initdata!!! --DaveM */static unsigned char boot_cpu_id;cpumask_t cpu_online_map = CPU_MASK_NONE;cpumask_t phys_cpu_present_map = CPU_MASK_NONE;static cpumask_t smp_commenced_mask;static cpumask_t cpu_callout_map;void smp_info(struct seq_file *m){ int i; seq_printf(m, "State:\n"); for (i = 0; i < NR_CPUS; i++) { if (cpu_online(i)) seq_printf(m, "CPU%d:\t\tonline\n", i); }}void smp_bogo(struct seq_file *m){ int i; for (i = 0; i < NR_CPUS; i++) if (cpu_online(i)) seq_printf(m, "Cpu%dBogo\t: %lu.%02lu\n" "Cpu%dClkTck\t: %016lx\n", i, cpu_data(i).udelay_val / (500000/HZ), (cpu_data(i).udelay_val / (5000/HZ)) % 100, i, cpu_data(i).clock_tick);}void __init smp_store_cpu_info(int id){ int cpu_node; /* multiplier and counter set by smp_setup_percpu_timer() */ cpu_data(id).udelay_val = loops_per_jiffy; cpu_find_by_mid(id, &cpu_node); cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); cpu_data(id).pgcache_size = 0; cpu_data(id).pte_cache[0] = NULL; cpu_data(id).pte_cache[1] = NULL; cpu_data(id).pgdcache_size = 0; cpu_data(id).pgd_cache = NULL; cpu_data(id).idle_volume = 1;}static void smp_setup_percpu_timer(void);static volatile unsigned long callin_flag = 0;extern void inherit_locked_prom_mappings(int save_p);void __init smp_callin(void){ int cpuid = hard_smp_processor_id(); inherit_locked_prom_mappings(0); __flush_tlb_all(); smp_setup_percpu_timer(); local_irq_enable(); calibrate_delay(); smp_store_cpu_info(cpuid); callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); /* Clear this or we will die instantly when we * schedule back to this idler... */ clear_thread_flag(TIF_NEWCHILD); /* Attach to the address space of init_task. */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; while (!cpu_isset(cpuid, smp_commenced_mask)) membar("#LoadLoad"); cpu_set(cpuid, cpu_online_map);}void cpu_panic(void){ printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id()); panic("SMP bolixed\n");}static unsigned long current_tick_offset;/* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. * * The only change I've made is to rework it so that the master * initiates the synchonization instead of the slave. -DaveM */#define MASTER 0#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))#define NUM_ROUNDS 64 /* magic value */#define NUM_ITERS 5 /* likewise */static DEFINE_SPINLOCK(itc_sync_lock);static unsigned long go[SLAVE + 1];#define DEBUG_TICK_SYNC 0static inline long get_delta (long *rt, long *master){ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; unsigned long tcenter, t0, t1, tm; unsigned long i; for (i = 0; i < NUM_ITERS; i++) { t0 = tick_ops->get_tick(); go[MASTER] = 1; membar("#StoreLoad"); while (!(tm = go[SLAVE])) membar("#LoadLoad"); go[SLAVE] = 0; membar("#StoreStore"); t1 = tick_ops->get_tick(); if (t1 - t0 < best_t1 - best_t0) best_t0 = t0, best_t1 = t1, best_tm = tm; } *rt = best_t1 - best_t0; *master = best_tm - best_t0; /* average best_t0 and best_t1 without overflow: */ tcenter = (best_t0/2 + best_t1/2); if (best_t0 % 2 + best_t1 % 2 == 2) tcenter++; return tcenter - best_tm;}void smp_synchronize_tick_client(void){ long i, delta, adj, adjust_latency = 0, done = 0; unsigned long flags, rt, master_time_stamp, bound;#if DEBUG_TICK_SYNC struct { long rt; /* roundtrip time */ long master; /* master's timestamp */ long diff; /* difference between midpoint and master's timestamp */ long lat; /* estimate of itc adjustment latency */ } t[NUM_ROUNDS];#endif go[MASTER] = 1; while (go[MASTER]) membar("#LoadLoad"); local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS; i++) { delta = get_delta(&rt, &master_time_stamp); if (delta == 0) { done = 1; /* let's lock on to this... */ bound = rt; } if (!done) { if (i > 0) { adjust_latency += -delta; adj = -delta + adjust_latency/4; } else adj = -delta; tick_ops->add_tick(adj, current_tick_offset); }#if DEBUG_TICK_SYNC t[i].rt = rt; t[i].master = master_time_stamp; t[i].diff = delta; t[i].lat = adjust_latency/4;#endif } } local_irq_restore(flags);#if DEBUG_TICK_SYNC for (i = 0; i < NUM_ROUNDS; i++) printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", t[i].rt, t[i].master, t[i].diff, t[i].lat);#endif printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles," "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);}static void smp_start_sync_tick_client(int cpu);static void smp_synchronize_one_tick(int cpu){ unsigned long flags, i; go[MASTER] = 0; smp_start_sync_tick_client(cpu); /* wait for client to be ready */ while (!go[MASTER]) membar("#LoadLoad"); /* now let the client proceed into his loop */ go[MASTER] = 0; membar("#StoreLoad"); spin_lock_irqsave(&itc_sync_lock, flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { while (!go[MASTER]) membar("#LoadLoad"); go[MASTER] = 0; membar("#StoreStore"); go[SLAVE] = tick_ops->get_tick(); membar("#StoreLoad"); } } spin_unlock_irqrestore(&itc_sync_lock, flags);}extern unsigned long sparc64_cpu_startup;/* The OBP cpu startup callback truncates the 3rd arg cookie to * 32-bits (I think) so to be safe we have it read the pointer * contained here so we work on >4GB machines. -DaveM */static struct thread_info *cpu_new_thread = NULL;static int __devinit smp_boot_one_cpu(unsigned int cpu){ unsigned long entry = (unsigned long)(&sparc64_cpu_startup); unsigned long cookie = (unsigned long)(&cpu_new_thread); struct task_struct *p; int timeout, ret, cpu_node; p = fork_idle(cpu); callin_flag = 0; cpu_new_thread = p->thread_info; cpu_set(cpu, cpu_callout_map); cpu_find_by_mid(cpu, &cpu_node); prom_startcpu(cpu_node, entry, cookie); for (timeout = 0; timeout < 5000000; timeout++) { if (callin_flag) break; udelay(100); } if (callin_flag) { ret = 0; } else { printk("Processor %d is stuck.\n", cpu); cpu_clear(cpu, cpu_callout_map); ret = -ENODEV; } cpu_new_thread = NULL; return ret;}static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu){ u64 result, target; int stuck, tmp; if (this_is_starfire) { /* map to real upaid */ cpu = (((cpu & 0x3c) << 1) | ((cpu & 0x40) >> 4) | (cpu & 0x3)); } target = (cpu << 14) | 0x70;again: /* Ok, this is the real Spitfire Errata #54. * One must read back from a UDB internal register * after writes to the UDB interrupt dispatch, but * before the membar Sync for that write. * So we use the high UDB control register (ASI 0x7f, * ADDR 0x20) for the dummy read. -DaveM */ tmp = 0x40; __asm__ __volatile__( "wrpr %1, %2, %%pstate\n\t" "stxa %4, [%0] %3\n\t" "stxa %5, [%0+%8] %3\n\t" "add %0, %8, %0\n\t" "stxa %6, [%0+%8] %3\n\t" "membar #Sync\n\t" "stxa %%g0, [%7] %3\n\t" "membar #Sync\n\t" "mov 0x20, %%g1\n\t" "ldxa [%%g1] 0x7f, %%g0\n\t" "membar #Sync" : "=r" (tmp) : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp) : "g1"); /* NOTE: PSTATE_IE is still clear. */ stuck = 100000; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (result) : "i" (ASI_INTR_DISPATCH_STAT)); if (result == 0) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } stuck -= 1; if (stuck == 0) break; } while (result & 0x1); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if (stuck == 0) { printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), result); } else { udelay(2); goto again; }}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){ u64 pstate; int i; __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); for_each_cpu_mask(i, mask) spitfire_xcall_helper(data0, data1, data2, pstate, i);}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that. However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask){ u64 pstate, ver; int nack_busy_id, is_jalapeno; if (cpus_empty(mask)) return; /* Unfortunately, someone at Sun had the brilliant idea to make the * busy/nack fields hard-coded by ITID number for this Ultra-III * derivative processor. */ __asm__ ("rdpr %%ver, %0" : "=r" (ver)); is_jalapeno = ((ver >> 32) == 0x003e0016); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));retry: __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" : : "r" (pstate), "i" (PSTATE_IE));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?