📄 sn2_smp.c
字号:
/* * SN2 Platform specific SMP Support * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. */#include <linux/init.h>#include <linux/kernel.h>#include <linux/spinlock.h>#include <linux/threads.h>#include <linux/sched.h>#include <linux/smp.h>#include <linux/interrupt.h>#include <linux/irq.h>#include <linux/mmzone.h>#include <linux/module.h>#include <linux/bitops.h>#include <linux/nodemask.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <asm/processor.h>#include <asm/irq.h>#include <asm/sal.h>#include <asm/system.h>#include <asm/delay.h>#include <asm/io.h>#include <asm/smp.h>#include <asm/tlb.h>#include <asm/numa.h>#include <asm/hw_irq.h>#include <asm/current.h>#ifdef XEN#include <asm/sn/arch.h>#endif#include <asm/sn/sn_cpuid.h>#include <asm/sn/sn_sal.h>#include <asm/sn/addrs.h>#include <asm/sn/shub_mmr.h>#include <asm/sn/nodepda.h>#include <asm/sn/rw_mmr.h>DEFINE_PER_CPU(struct ptc_stats, ptcstats);DECLARE_PER_CPU(struct ptc_stats, ptcstats);static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);extern unsigned longsn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);voidsn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);/* * Note: some is the following is captured here to make degugging easier * (the macros make more sense if you see the debug patch - not posted) */#define sn2_ptctest 0#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0)#define max_active_pio(sh1) ((sh1) ? 32 : 7)#define reset_max_active_on_deadlock() 1#ifndef XEN#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)#else#define PTC_LOCK(sh1) &sn2_global_ptc_lock#endifstruct ptc_stats { unsigned long ptc_l; unsigned long change_rid; unsigned long shub_ptc_flushes; unsigned long nodes_flushed; unsigned long deadlocks; unsigned long deadlocks2; unsigned long lock_itc_clocks; unsigned long shub_itc_clocks; unsigned long shub_itc_clocks_max; unsigned long shub_ptc_flushes_not_my_mm;};#define sn2_ptctest 0static inline unsigned long wait_piowc(void){ volatile unsigned long *piows; unsigned long zeroval, ws; piows = pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; do { cpu_relax(); } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;}#ifndef XEN /* No idea if Xen will ever support this *//** * sn_migrate - SN-specific task migration actions * @task: Task being migrated to new CPU * * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. * Context switching user threads which have memory-mapped MMIO may cause * PIOs to issue from seperate CPUs, thus the PIO writes must be drained * from the previous CPU's Shub before execution resumes on the new CPU. */void sn_migrate(struct task_struct *task){ pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); volatile unsigned long *adr = last_pda->pio_write_status_addr; unsigned long val = last_pda->pio_write_status_val; /* Drain PIO writes from old CPU's Shub */ while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val)) cpu_relax();}void sn_tlb_migrate_finish(struct mm_struct *mm){ /* flush_tlb_mm is inefficient if more than 1 users of mm */#ifndef XEN if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)#else if (mm == ¤t->arch.mm && mm && atomic_read(&mm->mm_users) == 1)#endif flush_tlb_mm(mm);}#endif/** * sn2_global_tlb_purge - globally purge translation cache of virtual address range * @mm: mm_struct containing virtual address range * @start: start of virtual address range * @end: end of virtual address range * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) * * Purges the translation caches of all processors of the given virtual address * range. * * Note: * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. * - cpu_vm_mask is converted into a nodemask of the nodes containing the * cpus in cpu_vm_mask. * - if only one bit is set in cpu_vm_mask & it is the current cpu & the * process is purging its own virtual address range, then only the * local TLB needs to be flushed. This flushing can be done using * ptc.l. This is the common case & avoids the global spinlock. * - if multiple cpus have loaded the context, then flushing has to be * done with ptc.g/MMRs under protection of the global ptc_lock. */#ifdef XEN /* Xen is soooooooo stupid! */// static cpumask_t mask_all = CPU_MASK_ALL;#endif#ifdef XENstatic DEFINE_SPINLOCK(sn2_ptcg_lock);struct sn_flush_struct { unsigned long start; unsigned long end; unsigned long nbits;};static void sn_flush_ptcga_cpu(void *ptr){ struct sn_flush_struct *sn_flush = ptr; unsigned long start, end, nbits; start = sn_flush->start; end = sn_flush->end; nbits = sn_flush->nbits; /* * Contention me harder!!! */ /* HW requires global serialization of ptc.ga. */ spin_lock(&sn2_ptcg_lock); { do { /* * Flush ALAT entries also. */ ia64_ptcga(start, (nbits<<2)); ia64_srlz_i(); start += (1UL << nbits); } while (start < end); } spin_unlock(&sn2_ptcg_lock);}voidsn2_global_tlb_purge(unsigned long start, unsigned long end, unsigned long nbits){ nodemask_t nodes_flushed; cpumask_t selected_cpus; int cpu, cnode, i; static DEFINE_SPINLOCK(sn2_ptcg_lock2); nodes_clear(nodes_flushed); cpus_clear(selected_cpus); spin_lock(&sn2_ptcg_lock2); node_set(cpu_to_node(smp_processor_id()), nodes_flushed); i = 0; for_each_cpu(cpu) { cnode = cpu_to_node(cpu); if (!node_isset(cnode, nodes_flushed)) { cpu_set(cpu, selected_cpus); i++; } node_set(cnode, nodes_flushed); } /* HW requires global serialization of ptc.ga. */ spin_lock(&sn2_ptcg_lock); { do { /* * Flush ALAT entries also. */ ia64_ptcga(start, (nbits<<2)); ia64_srlz_i(); start += (1UL << nbits); } while (start < end); } spin_unlock(&sn2_ptcg_lock); if (i) { struct sn_flush_struct flush_data; flush_data.start = start; flush_data.end = end; flush_data.nbits = nbits; on_selected_cpus(selected_cpus, sn_flush_ptcga_cpu, &flush_data, 1, 1); } spin_unlock(&sn2_ptcg_lock2);}#elsevoidsn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits){ int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; int mymm = (mm == current->active_mm && mm == current->mm); int use_cpu_ptcga; volatile unsigned long *ptc0, *ptc1; unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; int active, max_active, deadlock; nodes_clear(nodes_flushed); i = 0;#ifndef XEN /* One day Xen will grow up! */ for_each_cpu_mask(cpu, mm->cpu_vm_mask) { cnode = cpu_to_node(cpu); node_set(cnode, nodes_flushed); lcpu = cpu; i++; }#else for_each_cpu(cpu) { cnode = cpu_to_node(cpu); node_set(cnode, nodes_flushed); lcpu = cpu; i++; }#endif if (i == 0) return; preempt_disable();#ifndef XEN if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { do { ia64_ptcl(start, nbits << 2); start += (1UL << nbits); } while (start < end); ia64_srlz_i(); __get_cpu_var(ptcstats).ptc_l++; preempt_enable(); return; } if (atomic_read(&mm->mm_users) == 1 && mymm) { flush_tlb_mm(mm); __get_cpu_var(ptcstats).change_rid++; preempt_enable(); return; }#endif itc = ia64_get_itc(); nix = 0; for_each_node_mask(cnode, nodes_flushed) nasids[nix++] = cnodeid_to_nasid(cnode);#ifndef XEN rr_value = (mm->context << 3) | REGION_NUMBER(start);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -