📄 smp.c
字号:
} else { unsigned long bogosum = 0; for (i = 0; i < NR_CPUS; i++) { if (cpu_present_map & (1UL << i)) bogosum += cpu_data[i].udelay_val; } printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); smp_activated = 1; smp_num_cpus = cpucount + 1; } smp_processors_ready = 1; membar("#StoreStore | #StoreLoad"); smp_synchronize_tick();}static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu){ u64 result, target; int stuck, tmp; if (this_is_starfire) { /* map to real upaid */ cpu = (((cpu & 0x3c) << 1) | ((cpu & 0x40) >> 4) | (cpu & 0x3)); } target = (cpu << 14) | 0x70;again: /* Ok, this is the real Spitfire Errata #54. * One must read back from a UDB internal register * after writes to the UDB interrupt dispatch, but * before the membar Sync for that write. * So we use the high UDB control register (ASI 0x7f, * ADDR 0x20) for the dummy read. -DaveM */ tmp = 0x40; __asm__ __volatile__( "wrpr %1, %2, %%pstate\n\t" "stxa %4, [%0] %3\n\t" "stxa %5, [%0+%8] %3\n\t" "add %0, %8, %0\n\t" "stxa %6, [%0+%8] %3\n\t" "membar #Sync\n\t" "stxa %%g0, [%7] %3\n\t" "membar #Sync\n\t" "mov 0x20, %%g1\n\t" "ldxa [%%g1] 0x7f, %%g0\n\t" "membar #Sync" : "=r" (tmp) : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp) : "g1"); /* NOTE: PSTATE_IE is still clear. */ stuck = 100000; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (result) : "i" (ASI_INTR_DISPATCH_STAT)); if (result == 0) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } stuck -= 1; if (stuck == 0) break; } while (result & 0x1); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if (stuck == 0) { printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), result); } else { udelay(2); goto again; }}static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, unsigned long mask){ int ncpus = smp_num_cpus - 1; int i; u64 pstate; __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); for (i = 0; (i < NR_CPUS) && ncpus; i++) { if (mask & (1UL << i)) { spitfire_xcall_helper(data0, data1, data2, pstate, i); ncpus--; } }}/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that. However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). */#if NR_CPUS > 32#error Fixup cheetah_xcall_deliver Dave...#endifstatic void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, unsigned long mask){ u64 pstate, ver; int nack_busy_id, is_jalapeno; if (!mask) return; /* Unfortunately, someone at Sun had the brilliant idea to make the * busy/nack fields hard-coded by ITID number for this Ultra-III * derivative processor. */ __asm__ ("rdpr %%ver, %0" : "=r" (ver)); is_jalapeno = ((ver >> 32) == 0x003e0016); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));retry: __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" : : "r" (pstate), "i" (PSTATE_IE)); /* Setup the dispatch data registers. */ __asm__ __volatile__("stxa %0, [%3] %6\n\t" "stxa %1, [%4] %6\n\t" "stxa %2, [%5] %6\n\t" "membar #Sync\n\t" : /* no outputs */ : "r" (data0), "r" (data1), "r" (data2), "r" (0x40), "r" (0x50), "r" (0x60), "i" (ASI_INTR_W)); nack_busy_id = 0; { int i, ncpus = smp_num_cpus - 1; for (i = 0; (i < NR_CPUS) && ncpus; i++) { if (mask & (1UL << i)) { u64 target = (i << 14) | 0x70; if (!is_jalapeno) target |= (nack_busy_id << 24); __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" "membar #Sync\n\t" : /* no outputs */ : "r" (target), "i" (ASI_INTR_W)); nack_busy_id++; ncpus--; } } } /* Now, poll for completion. */ { u64 dispatch_stat; long stuck; stuck = 100000 * nack_busy_id; do { __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (dispatch_stat) : "i" (ASI_INTR_DISPATCH_STAT)); if (dispatch_stat == 0UL) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); return; } if (!--stuck) break; } while (dispatch_stat & 0x5555555555555555UL); __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) { /* Busy bits will not clear, continue instead * of freezing up on this cpu. */ printk("CPU[%d]: mondo stuckage result[%016lx]\n", smp_processor_id(), dispatch_stat); } else { int i, this_busy_nack = 0; /* Delay some random time with interrupts enabled * to prevent deadlock. */ udelay(2 * nack_busy_id); /* Clear out the mask bits for cpus which did not * NACK us. */ for (i = 0; i < NR_CPUS; i++) { if (mask & (1UL << i)) { u64 check_mask; if (is_jalapeno) check_mask = (0x2UL << (2*i)); else check_mask = (0x2UL << this_busy_nack); if ((dispatch_stat & check_mask) == 0) mask &= ~(1UL << i); this_busy_nack += 2; } } goto retry; } }}/* Send cross call to all processors mentioned in MASK * except self. */static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, unsigned long mask){ if (smp_processors_ready) { u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); mask &= ~(1UL<<smp_processor_id()); if (tlb_type == spitfire) spitfire_xcall_deliver(data0, data1, data2, mask); else cheetah_xcall_deliver(data0, data1, data2, mask); /* NOTE: Caller runs local copy on master. */ }}extern unsigned long xcall_sync_tick;static void smp_start_sync_tick_client(int cpu){ smp_cross_call_masked(&xcall_sync_tick, 0, 0, 0, (1UL << cpu));}/* Send cross call to all processors except self. */#define smp_cross_call(func, ctx, data1, data2) \ smp_cross_call_masked(func, ctx, data1, data2, cpu_present_map)struct call_data_struct { void (*func) (void *info); void *info; atomic_t finished; int wait;};static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;static struct call_data_struct *call_data;extern unsigned long xcall_call_function;int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait){ struct call_data_struct data; int cpus = smp_num_cpus - 1; long timeout; if (!cpus) return 0; data.func = func; data.info = info; atomic_set(&data.finished, 0); data.wait = wait; spin_lock_bh(&call_lock); call_data = &data; smp_cross_call(&xcall_call_function, 0, 0, 0); /* * Wait for other cpus to complete function or at * least snap the call data. */ timeout = 1000000; while (atomic_read(&data.finished) != cpus) { if (--timeout <= 0) goto out_timeout; barrier(); udelay(1); } spin_unlock_bh(&call_lock); return 0;out_timeout: spin_unlock_bh(&call_lock); printk("XCALL: Remote cpus not responding, ncpus=%d finished=%d\n", smp_num_cpus - 1, atomic_read(&data.finished)); return 0;}void smp_call_function_client(int irq, struct pt_regs *regs){ void (*func) (void *info) = call_data->func; void *info = call_data->info; clear_softint(1 << irq); if (call_data->wait) { /* let initiator proceed only after completion */ func(info); atomic_inc(&call_data->finished); } else { /* let initiator proceed after getting data */ atomic_inc(&call_data->finished); func(info); }}extern unsigned long xcall_flush_tlb_page;extern unsigned long xcall_flush_tlb_mm;extern unsigned long xcall_flush_tlb_range;extern unsigned long xcall_flush_tlb_all_spitfire;extern unsigned long xcall_flush_tlb_all_cheetah;extern unsigned long xcall_flush_cache_all_spitfire;extern unsigned long xcall_report_regs;extern unsigned long xcall_receive_signal;extern unsigned long xcall_flush_dcache_page_cheetah;extern unsigned long xcall_flush_dcache_page_spitfire;#ifdef CONFIG_DEBUG_DCFLUSHextern atomic_t dcpage_flushes;extern atomic_t dcpage_flushes_xcall;#endifstatic __inline__ void __local_flush_dcache_page(struct page *page){#if (L1DCACHE_SIZE > PAGE_SIZE) __flush_dcache_page(page->virtual, ((tlb_type == spitfire) && page->mapping != NULL));#else if (page->mapping != NULL && tlb_type == spitfire) __flush_icache_page(__pa(page->virtual));#endif}void smp_flush_dcache_page_impl(struct page *page, int cpu){ if (smp_processors_ready) { unsigned long mask = 1UL << cpu;#ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes);#endif if (cpu == smp_processor_id()) { __local_flush_dcache_page(page); } else if ((cpu_present_map & mask) != 0) { u64 data0; if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); if (page->mapping != NULL) data0 |= ((u64)1 << 32); spitfire_xcall_deliver(data0, __pa(page->virtual), (u64) page->virtual, mask); } else { data0 = ((u64)&xcall_flush_dcache_page_cheetah); cheetah_xcall_deliver(data0, __pa(page->virtual), 0, mask); }#ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall);#endif } }}void flush_dcache_page_all(struct mm_struct *mm, struct page *page){ if (smp_processors_ready) { unsigned long mask = cpu_present_map & ~(1UL << smp_processor_id()); u64 data0;#ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes);#endif if (mask == 0UL) goto flush_self; if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); if (page->mapping != NULL) data0 |= ((u64)1 << 32); spitfire_xcall_deliver(data0, __pa(page->virtual), (u64) page->virtual, mask); } else { data0 = ((u64)&xcall_flush_dcache_page_cheetah); cheetah_xcall_deliver(data0, __pa(page->virtual), 0, mask); }#ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes_xcall);#endif flush_self: __local_flush_dcache_page(page); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -