📄 sn2_smp.c
字号:
* -- unlock the global lock * -- send IPI to notify other cpus * -- process the data ourselves */ ptcParamsEmpty = next; spin_unlock(&ptcParamsLock); smp_send_flush_tlb(); local_irq_save(irqflags); sn1_received_flush_tlb(); local_irq_restore(irqflags); /* * Since IPIs are polled event (for now), we need to wait til the * TLB flush has started. * wait for the flush to complete */ while (atomic_read(¶ms->unfinished_count) > 0) barrier();}#endif /* PTCG_WAR *//** * sn2_global_tlb_purge - globally purge translation cache of virtual address range * @start: start of virtual address range * @end: end of virtual address range * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) * * Purges the translation caches of all processors of the given virtual address * range. */voidsn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits){ int cnode, mycnode, nasid; volatile unsigned long *ptc0, *ptc1; unsigned long flags=0, data0, data1; long zzbase, zztime, zztime2; /* * Special case 1 cpu & 1 node. Use local purges. */#ifdef PTCG_WAR sn1_global_tlb_purge(start, end, nbits); return;#endif /* PTCG_WAR */ data0 = (1UL<<SH_PTC_0_A_SHFT) | (nbits<<SH_PTC_0_PS_SHFT) | ((ia64_get_rr(start)>>8)<<SH_PTC_0_RID_SHFT) | (1UL<<SH_PTC_0_START_SHFT); ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0); ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1); mycnode = local_cnodeid(); /* * For now, we dont want to spin uninterruptibly waiting * for the lock. Makes hangs hard to debug. */ zzbase = smp_processor_id()*16; zz[zzbase+0] += 0x0000ffff00000001UL; local_irq_save(flags); zztime = get_itc(); spin_lock(&sn2_global_ptc_lock); zz[zzbase+0] += 0xffff000000000000UL; zztime2 = get_itc(); do { data1 = start | (1UL<<SH_PTC_1_START_SHFT); for (cnode = 0; cnode < numnodes; cnode++) { if (is_headless_node(cnode)) continue; if (cnode == mycnode) { asm volatile ("ptc.ga %0,%1;;srlz.i;;" :: "r"(start), "r"(nbits<<2) : "memory"); } else { nasid = cnodeid_to_nasid(cnode); ptc0 = CHANGE_NASID(nasid, ptc0); ptc1 = CHANGE_NASID(nasid, ptc1); pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1); } } if (wait_piowc() & SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK_MASK) { zz[zzbase+1]++; sn2_ptc_deadlock_recovery(data0, data1); } start += (1UL << nbits); } while (start < end); zz[zzbase+0] -= 0xffffffff00000000UL; zztime2 = get_itc() - zztime2; spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); zztime = get_itc() - zztime; zz[zzbase+6] += zztime; if (zztime > zz[zzbase+7]) zz[zzbase+7] = zztime; zz[zzbase+8] += zztime2; if (zztime2 > zz[zzbase+9]) zz[zzbase+9] = zztime2; }/* * sn2_ptc_deadlock_recovery * * Recover from PTC deadlocks conditions. Recovery requires stepping thru each * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */voidsn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1){ extern void sn2_ptc_deadlock_recovery_core(long*, long, long*, long, long*); int cnode, mycnode, nasid; long *ptc0, *ptc1, *piows; sn2_ptc_deadlock_count++; ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0); ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1); piows = (long*)pda.pio_write_status_addr; mycnode = local_cnodeid(); for (cnode = 0; cnode < numnodes; cnode++) { if (is_headless_node(cnode) || cnode == mycnode) continue; nasid = cnodeid_to_nasid(cnode); ptc0 = CHANGE_NASID(nasid, ptc0); ptc1 = CHANGE_NASID(nasid, ptc1); sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows); }}/** * sn_send_IPI_phys - send an IPI to a Nasid and slice * @physid: physical cpuid to receive the interrupt. * @vector: command to send * @delivery_mode: delivery mechanism * * Sends an IPI (interprocessor interrupt) to the processor specified by * @physid * * @delivery_mode can be one of the following * * %IA64_IPI_DM_INT - pend an interrupt * %IA64_IPI_DM_PMI - pend a PMI * %IA64_IPI_DM_NMI - pend an NMI * %IA64_IPI_DM_INIT - pend an INIT interrupt */voidsn_send_IPI_phys(long physid, int vector, int delivery_mode){ long nasid, slice; long val; volatile long *p;#if defined(BUS_INT_WAR) && defined(CONFIG_SHUB_1_0_SPECIFIC) if (vector != ap_wakeup_vector && delivery_mode == IA64_IPI_DM_INT) { return; }#endif nasid = cpu_physical_id_to_nasid(physid); slice = cpu_physical_id_to_slice(physid); p = (long*)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); val = (1UL<<SH_IPI_INT_SEND_SHFT) | (physid<<SH_IPI_INT_PID_SHFT) | ((long)delivery_mode<<SH_IPI_INT_TYPE_SHFT) | ((long)vector<<SH_IPI_INT_IDX_SHFT) | (0x000feeUL<<SH_IPI_INT_BASE_SHFT); mb(); pio_phys_write_mmr(p, val);#ifndef CONFIG_SHUB_1_0_SPECIFIC /* doesnt work on shub 1.0 */ wait_piowc();#endif}/** * sn2_send_IPI - send an IPI to a processor * @cpuid: target of the IPI * @vector: command to send * @delivery_mode: delivery mechanism * @redirect: redirect the IPI? * * Sends an IPI (InterProcessor Interrupt) to the processor specified by * @cpuid. @vector specifies the command to send, while @delivery_mode can * be one of the following * * %IA64_IPI_DM_INT - pend an interrupt * %IA64_IPI_DM_PMI - pend a PMI * %IA64_IPI_DM_NMI - pend an NMI * %IA64_IPI_DM_INIT - pend an INIT interrupt */voidsn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect){ long physid; physid = cpu_physical_id(cpuid); sn_send_IPI_phys(physid, vector, delivery_mode);}/* ----------------------------------------------------------------------------------------------- */#include <linux/proc_fs.h>static struct proc_dir_entry *ptcg_stats;static intptcg_read_proc(char *buffer, char **start, off_t off, int count, int *eof, void *data){ long cpu, *p, cnt; int len = 0; p = (long*)LOCAL_MMR_ADDR(SH_DIAG_MSG_DATA7L); len += sprintf(buffer + len, "PTCG Stats (option 0x%lx)\n", use_tlb_hack); len += sprintf(buffer + len, "%-14s", "CPU"); for (cpu=0; cpu<smp_num_cpus; cpu++) len += sprintf(buffer + len, "%10ld", cpu); len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "flushes"); for (cpu=0; cpu<smp_num_cpus; cpu++) len += sprintf(buffer + len, "%10ld", zz[cpu*16+0]&0xffffffff); len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "deadlocks"); for (cpu=0; cpu<smp_num_cpus; cpu++) len += sprintf(buffer + len, "%10ld", zz[cpu*16+1]); len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "aver-us"); for (cpu=0; cpu<smp_num_cpus; cpu++) { cnt = zz[cpu*16+0]&0xffffffff; len += sprintf(buffer + len, "%10ld", cnt ? zz[cpu*16+6]/(cnt)/cpu_data(cpu)->cyc_per_usec : 0); } len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "max-us"); for (cpu=0; cpu<smp_num_cpus; cpu++) len += sprintf(buffer + len, "%10ld", zz[cpu*16+7]/cpu_data(cpu)->cyc_per_usec); len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "iaver-us"); for (cpu=0; cpu<smp_num_cpus; cpu++) { cnt = zz[cpu*16+0]&0xffffffff; len += sprintf(buffer + len, "%10ld", cnt ? zz[cpu*16+8]/(cnt)/cpu_data(cpu)->cyc_per_usec : 0); } len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "imax-us"); for (cpu=0; cpu<smp_num_cpus; cpu++) len += sprintf(buffer + len, "%10ld", zz[cpu*16+9]/cpu_data(cpu)->cyc_per_usec); len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "long-piowc"); for (cpu=0; cpu<smp_num_cpus; cpu++) len += sprintf(buffer + len, "%10ld", zz[cpu*16+9]); len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "ipi-rp-error"); for (cpu=0; cpu<smp_num_cpus; cpu++) { p = (long*)GLOBAL_MMR_ADDR(cpuid_to_nasid(cpu), SH_XN_IILB_LB_CMP_ENABLE0); len += sprintf(buffer + len, "%10ld", *p); } len += sprintf(buffer + len, "\n"); len += sprintf(buffer + len, "%-14s", "lb-cb-error"); for (cpu=0; cpu<smp_num_cpus; cpu++) { p = (long*)GLOBAL_MMR_ADDR(cpuid_to_nasid(cpu), SH_XN_IILB_LB_CMP_ENABLE1); len += sprintf(buffer + len, "%10ld", *p); } len += sprintf(buffer + len, "\n"); if (len <= off+count) *eof = 1; *start = buffer + off; len -= off; if (len>count) len = count; if (len<0) len = 0; return len;}static intptcg_write_proc (struct file *file, const char *userbuf, unsigned long count, void *data){ extern long atoi(char *); char buf[80]; long val; long cpu; long *p; if (copy_from_user(buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) return -EFAULT; val = atoi(buf); if (val&4) val |= 1; use_tlb_hack = val; memset((void*)zz, 0, sizeof(zz)); for (cpu=0; cpu<smp_num_cpus; cpu++) { p = (long*)GLOBAL_MMR_ADDR(cpuid_to_nasid(cpu), SH_XN_IILB_LB_CMP_ENABLE0); *p = 0; p = (long*)GLOBAL_MMR_ADDR(cpuid_to_nasid(cpu), SH_XN_IILB_LB_CMP_ENABLE1); *p = 0; } return count;} static intsn2_ptcg_stats_init(void){ if ((ptcg_stats = create_proc_entry("ptcg_stats", 0644, NULL)) == NULL) { printk("unable to create proc entry for ptcg_stats"); return -1; } ptcg_stats->read_proc = ptcg_read_proc; ptcg_stats->write_proc = ptcg_write_proc; return 0;}#include <linux/module.h>module_init(sn2_ptcg_stats_init);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -