📄 mce.c
字号:
/* * Machine check handler. * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. * Rest from unknown author(s). * 2004 Andi Kleen. Rewrote most of it. */#include <linux/init.h>#include <linux/types.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/string.h>#include <linux/rcupdate.h>#include <linux/kallsyms.h>#include <linux/sysdev.h>#include <linux/miscdevice.h>#include <linux/fs.h>#include <linux/cpu.h>#include <linux/percpu.h>#include <linux/ctype.h>#include <asm/processor.h> #include <asm/msr.h>#include <asm/mce.h>#include <asm/kdebug.h>#include <asm/uaccess.h>#define MISC_MCELOG_MINOR 227#define NR_BANKS 5static int mce_dont_init;/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, 3: never panic or exit (for testing only) */static int tolerant = 1;static int banks;static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };static unsigned long console_logged;static int notify_user;static int rip_msr;static int mce_bootlog = 1;/* * Lockless MCE logging infrastructure. * This avoids deadlocks on printk locks without having to break locks. Also * separate MCEs from kernel messages to avoid bogus bug reports. */struct mce_log mcelog = { MCE_LOG_SIGNATURE, MCE_LOG_LEN,}; void mce_log(struct mce *mce){ unsigned next, entry; mce->finished = 0; wmb(); for (;;) { entry = rcu_dereference(mcelog.next); /* The rmb forces the compiler to reload next in each iteration */ rmb(); for (;;) { /* When the buffer fills up discard new entries. Assume that the earlier errors are the more interesting. */ if (entry >= MCE_LOG_LEN) { set_bit(MCE_OVERFLOW, &mcelog.flags); return; } /* Old left over entry. Skip. */ if (mcelog.entry[entry].finished) { entry++; continue; } break; } smp_rmb(); next = entry + 1; if (cmpxchg(&mcelog.next, entry, next) == entry) break; } memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); wmb(); mcelog.entry[entry].finished = 1; wmb(); if (!test_and_set_bit(0, &console_logged)) notify_user = 1;}static void print_mce(struct mce *m){ printk(KERN_EMERG "\n" KERN_EMERG "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->cpu, m->mcgstatus, m->bank, m->status); if (m->rip) { printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->rip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->rip); printk("\n"); } printk(KERN_EMERG "TSC %Lx ", m->tsc); if (m->addr) printk("ADDR %Lx ", m->addr); if (m->misc) printk("MISC %Lx ", m->misc); printk("\n");}static void mce_panic(char *msg, struct mce *backup, unsigned long start){ int i; oops_begin(); for (i = 0; i < MCE_LOG_LEN; i++) { unsigned long tsc = mcelog.entry[i].tsc; if (time_before(tsc, start)) continue; print_mce(&mcelog.entry[i]); if (backup && mcelog.entry[i].tsc == backup->tsc) backup = NULL; } if (backup) print_mce(backup); if (tolerant >= 3) printk("Fake panic: %s\n", msg); else panic(msg);} static int mce_available(struct cpuinfo_x86 *c){ return test_bit(X86_FEATURE_MCE, &c->x86_capability) && test_bit(X86_FEATURE_MCA, &c->x86_capability);}static inline void mce_get_rip(struct mce *m, struct pt_regs *regs){ if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { m->rip = regs->rip; m->cs = regs->cs; } else { m->rip = 0; m->cs = 0; } if (rip_msr) { /* Assume the RIP in the MSR is exact. Is this true? */ m->mcgstatus |= MCG_STATUS_EIPV; rdmsrl(rip_msr, m->rip); m->cs = 0; }}/* * The actual machine check handler */void do_machine_check(struct pt_regs * regs, long error_code){ struct mce m, panicm; int nowayout = (tolerant < 1); int kill_it = 0; u64 mcestart = 0; int i; int panicm_found = 0; if (regs) notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL); if (!banks) return; memset(&m, 0, sizeof(struct mce)); m.cpu = hard_smp_processor_id(); rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); if (!(m.mcgstatus & MCG_STATUS_RIPV)) kill_it = 1; rdtscll(mcestart); barrier(); for (i = 0; i < banks; i++) { if (!bank[i]) continue; m.misc = 0; m.addr = 0; m.bank = i; m.tsc = 0; rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); if ((m.status & MCI_STATUS_VAL) == 0) continue; if (m.status & MCI_STATUS_EN) { /* In theory _OVER could be a nowayout too, but assume any overflowed errors were no fatal. */ nowayout |= !!(m.status & MCI_STATUS_PCC); kill_it |= !!(m.status & MCI_STATUS_UC); } if (m.status & MCI_STATUS_MISCV) rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); if (m.status & MCI_STATUS_ADDRV) rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); mce_get_rip(&m, regs); if (error_code >= 0) rdtscll(m.tsc); wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0); if (error_code != -2) mce_log(&m); /* Did this bank cause the exception? */ /* Assume that the bank with uncorrectable errors did it, and that there is only a single one. */ if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { panicm = m; panicm_found = 1; } add_taint(TAINT_MACHINE_CHECK); } /* Never do anything final in the polling timer */ if (!regs) goto out; /* If we didn't find an uncorrectable error, pick the last one (shouldn't happen, just being safe). */ if (!panicm_found) panicm = m; if (nowayout) mce_panic("Machine check", &panicm, mcestart); if (kill_it) { int user_space = 0; if (m.mcgstatus & MCG_STATUS_RIPV) user_space = panicm.rip && (panicm.cs & 3); /* When the machine was in user space and the CPU didn't get confused it's normally not necessary to panic, unless you are paranoid (tolerant == 0) RED-PEN could be more tolerant for MCEs in idle, but most likely they occur at boot anyways, where it is best to just halt the machine. */ if ((!user_space && (panic_on_oops || tolerant < 2)) || (unsigned)current->pid <= 1) mce_panic("Uncorrected machine check", &panicm, mcestart); /* do_exit takes an awful lot of locks and has as slight risk of deadlocking. If you don't want that don't set tolerant >= 2 */ if (tolerant < 3) do_exit(SIGBUS); } out: /* Last thing done in the machine check exception to clear state. */ wrmsrl(MSR_IA32_MCG_STATUS, 0);}/* * Periodic polling timer for "silent" machine check errors. */static int check_interval = 5 * 60; /* 5 minutes */static void mcheck_timer(void *data);static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);static void mcheck_check_cpu(void *info){ if (mce_available(¤t_cpu_data)) do_machine_check(NULL, 0);}static void mcheck_timer(void *data){ on_each_cpu(mcheck_check_cpu, NULL, 1, 1); schedule_delayed_work(&mcheck_work, check_interval * HZ); /* * It's ok to read stale data here for notify_user and * console_logged as we'll simply get the updated versions * on the next mcheck_timer execution and atomic operations * on console_logged act as synchronization for notify_user * writes. */ if (notify_user && console_logged) { notify_user = 0; clear_bit(0, &console_logged); printk(KERN_INFO "Machine check events logged\n"); }}static __init int periodic_mcheck_init(void){ if (check_interval) schedule_delayed_work(&mcheck_work, check_interval*HZ); return 0;} __initcall(periodic_mcheck_init);/* * Initialize Machine Checks for a CPU. */static void mce_init(void *dummy){ u64 cap; int i; rdmsrl(MSR_IA32_MCG_CAP, cap); banks = cap & 0xff; if (banks > NR_BANKS) { printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); banks = NR_BANKS; } /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) rip_msr = MSR_IA32_MCG_EIP; /* Log the machine checks left over from the previous reset. This also clears all registers */ do_machine_check(NULL, mce_bootlog ? -1 : -2);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -