📄 perfctr-watchdog.c
字号:
| P6_EVNTSEL_USR | P6_NMI_EVENT; /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(evntsel_msr, evntsel, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; //unused return 1;}static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz){ /* P6 based Pentium M need to re-unmask * the apic vector but it doesn't hurt * other P6 variant. * ArchPerfom/Core Duo also needs this */ apic_write(APIC_LVTPC, APIC_DM_NMI); /* P6/ARCH_PERFMON has 32 bit counter write */ write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);}static const struct wd_ops p6_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_p6_watchdog, .rearm = p6_rearm, .stop = single_msr_stop_watchdog, .perfctr = MSR_P6_PERFCTR0, .evntsel = MSR_P6_EVNTSEL0, .checkbit = 1ULL<<39,};/* Intel P4 performance counters. By far the most complicated of all. */#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)#define P4_ESCR_OS (1<<3)#define P4_ESCR_USR (1<<2)#define P4_CCCR_OVF_PMI0 (1<<26)#define P4_CCCR_OVF_PMI1 (1<<27)#define P4_CCCR_THRESHOLD(N) ((N)<<20)#define P4_CCCR_COMPLEMENT (1<<19)#define P4_CCCR_COMPARE (1<<18)#define P4_CCCR_REQUIRED (3<<16)#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)#define P4_CCCR_ENABLE (1<<12)#define P4_CCCR_OVF (1<<31)/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter CRU_ESCR0 (with any non-null event selector) through a complemented max threshold. [IA32-Vol3, Section 14.9.9] */static int setup_p4_watchdog(unsigned nmi_hz){ unsigned int perfctr_msr, evntsel_msr, cccr_msr; unsigned int evntsel, cccr_val; unsigned int misc_enable, dummy; unsigned int ht_num; struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) return 0;#ifdef CONFIG_SMP /* detect which hyperthread we are on */ if (smp_num_siblings == 2) { unsigned int ebx, apicid; ebx = cpuid_ebx(1); apicid = (ebx >> 24) & 0xff; ht_num = apicid & 1; } else#endif ht_num = 0; /* performance counters are shared resources * assign each hyperthread its own set * (re-use the ESCR0 register, seems safe * and keeps the cccr_val the same) */ if (!ht_num) { /* logical cpu 0 */ perfctr_msr = MSR_P4_IQ_PERFCTR0; evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR0; cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); } else { /* logical cpu 1 */ perfctr_msr = MSR_P4_IQ_PERFCTR1; evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR1; cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); } evntsel = P4_ESCR_EVENT_SELECT(0x3F) | P4_ESCR_OS | P4_ESCR_USR; cccr_val |= P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | P4_CCCR_COMPARE | P4_CCCR_REQUIRED; wrmsr(evntsel_msr, evntsel, 0); wrmsr(cccr_msr, cccr_val, 0); write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); apic_write(APIC_LVTPC, APIC_DM_NMI); cccr_val |= P4_CCCR_ENABLE; wrmsr(cccr_msr, cccr_val, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = cccr_msr; return 1;}static void stop_p4_watchdog(void){ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); wrmsr(wd->cccr_msr, 0, 0); wrmsr(wd->evntsel_msr, 0, 0);}static int p4_reserve(void){ if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) return 0;#ifdef CONFIG_SMP if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) goto fail1;#endif if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) goto fail2; /* RED-PEN why is ESCR1 not reserved here? */ return 1; fail2:#ifdef CONFIG_SMP if (smp_num_siblings > 1) release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); fail1:#endif release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); return 0;}static void p4_unreserve(void){#ifdef CONFIG_SMP if (smp_num_siblings > 1) release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);#endif release_evntsel_nmi(MSR_P4_CRU_ESCR0); release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);}static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz){ unsigned dummy; /* * P4 quirks: * - An overflown perfctr will assert its interrupt * until the OVF flag in its CCCR is cleared. * - LVTPC is masked on interrupt and must be * unmasked by the LVTPC handler. */ rdmsrl(wd->cccr_msr, dummy); dummy &= ~P4_CCCR_OVF; wrmsrl(wd->cccr_msr, dummy); apic_write(APIC_LVTPC, APIC_DM_NMI); /* start the cycle over again */ write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);}static const struct wd_ops p4_wd_ops = { .reserve = p4_reserve, .unreserve = p4_unreserve, .setup = setup_p4_watchdog, .rearm = p4_rearm, .stop = stop_p4_watchdog, /* RED-PEN this is wrong for the other sibling */ .perfctr = MSR_P4_BPU_PERFCTR0, .evntsel = MSR_P4_BSU_ESCR0, .checkbit = 1ULL<<39,};/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully all future Intel CPUs. */#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASKstatic struct wd_ops intel_arch_wd_ops;static int setup_intel_arch_watchdog(unsigned nmi_hz){ unsigned int ebx; union cpuid10_eax eax; unsigned int unused; unsigned int perfctr_msr, evntsel_msr; unsigned int evntsel; struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); /* * Check whether the Architectural PerfMon supports * Unhalted Core Cycles Event or not. * NOTE: Corresponding bit = 0 in ebx indicates event present. */ cpuid(10, &(eax.full), &ebx, &unused, &unused); if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) return 0; perfctr_msr = wd_ops->perfctr; evntsel_msr = wd_ops->evntsel; wrmsrl(perfctr_msr, 0UL); evntsel = ARCH_PERFMON_EVENTSEL_INT | ARCH_PERFMON_EVENTSEL_OS | ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_NMI_EVENT_SEL | ARCH_PERFMON_NMI_EVENT_UMASK; /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsr(evntsel_msr, evntsel, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; //unused intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1;}static struct wd_ops intel_arch_wd_ops __read_mostly = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_intel_arch_watchdog, .rearm = p6_rearm, .stop = single_msr_stop_watchdog, .perfctr = MSR_ARCH_PERFMON_PERFCTR1, .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,};static struct wd_ops coreduo_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_intel_arch_watchdog, .rearm = p6_rearm, .stop = single_msr_stop_watchdog, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,};static void probe_nmi_watchdog(void){ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && boot_cpu_data.x86 != 16) return; wd_ops = &k7_wd_ops; break; case X86_VENDOR_INTEL: /* Work around Core Duo (Yonah) errata AE49 where perfctr1 doesn't have a working enable bit. */ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { wd_ops = &coreduo_wd_ops; break; } if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { wd_ops = &intel_arch_wd_ops; break; } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) return; wd_ops = &p6_wd_ops; break; case 15: if (boot_cpu_data.x86_model > 0x4) return; wd_ops = &p4_wd_ops; break; default: return; } break; }}/* Interface to nmi.c */int lapic_watchdog_init(unsigned nmi_hz){ if (!wd_ops) { probe_nmi_watchdog(); if (!wd_ops) return -1; if (!wd_ops->reserve()) { printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); return -1; } } if (!(wd_ops->setup(nmi_hz))) { printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", raw_smp_processor_id()); return -1; } return 0;}void lapic_watchdog_stop(void){ if (wd_ops) wd_ops->stop();}unsigned lapic_adjust_nmi_hz(unsigned hz){ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); if (wd->perfctr_msr == MSR_P6_PERFCTR0 || wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) hz = adjust_for_32bit_ctr(hz); return hz;}int lapic_wd_event(unsigned nmi_hz){ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; rdmsrl(wd->perfctr_msr, ctr); if (ctr & wd_ops->checkbit) { /* perfctr still running? */ return 0; } wd_ops->rearm(wd, nmi_hz); return 1;}int lapic_watchdog_ok(void){ return wd_ops != NULL;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -