📄 mce_64.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Machine check handler. * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. * Rest from unknown author(s). * 2004 Andi Kleen. Rewrote most of it. */#include <linux/init.h>#include <linux/types.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/string.h>#include <linux/rcupdate.h>#include <linux/kallsyms.h>#include <linux/sysdev.h>#include <linux/miscdevice.h>#include <linux/fs.h>#include <linux/capability.h>#include <linux/cpu.h>#include <linux/percpu.h>#include <linux/poll.h>#include <linux/thread_info.h>#include <linux/ctype.h>#include <linux/kmod.h>#include <linux/kdebug.h>#include <asm/processor.h>#include <asm/msr.h>#include <asm/mce.h>#include <asm/uaccess.h>#include <asm/smp.h>#include <asm/idle.h>#define MISC_MCELOG_MINOR 227#define NR_BANKS 6atomic_t mce_entry;static int mce_dont_init;/* * Tolerant levels: *   0: always panic on uncorrected errors, log corrected errors *   1: panic or SIGBUS on uncorrected errors, log corrected errors *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors *   3: never panic or SIGBUS, log all errors (for testing only) */static int tolerant = 1;static int banks;static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };static unsigned long notify_user;static int rip_msr;static int mce_bootlog = 1;static atomic_t mce_events;static char trigger[128];static char *trigger_argv[2] = { trigger, NULL };static DECLARE_WAIT_QUEUE_HEAD(mce_wait);/* * Lockless MCE logging infrastructure. * This avoids deadlocks on printk locks without having to break locks. Also * separate MCEs from kernel messages to avoid bogus bug reports. */struct mce_log mcelog = {	MCE_LOG_SIGNATURE,	MCE_LOG_LEN,};void mce_log(struct mce *mce){	unsigned next, entry;	atomic_inc(&mce_events);	mce->finished = 0;	wmb();	for (;;) {		entry = rcu_dereference(mcelog.next);		for (;;) {			/* When the buffer fills up discard new entries. Assume			   that the earlier errors are the more interesting. */			if (entry >= MCE_LOG_LEN) {				set_bit(MCE_OVERFLOW, &mcelog.flags);				return;			}			/* Old left over entry. Skip. */			if (mcelog.entry[entry].finished) {				entry++;				continue;			}			break;		}		smp_rmb();		next = entry + 1;		if (cmpxchg(&mcelog.next, entry, next) == entry)			break;	}	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));	wmb();	mcelog.entry[entry].finished = 1;	wmb();	set_bit(0, &notify_user);}static void print_mce(struct mce *m){	printk(KERN_EMERG "\n"	       KERN_EMERG "HARDWARE ERROR\n"	       KERN_EMERG	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",	       m->cpu, m->mcgstatus, m->bank, m->status);	if (m->rip) {		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",		       m->cs, m->rip);		if (m->cs == __KERNEL_CS)			print_symbol("{%s}", m->rip);		printk("\n");	}	printk(KERN_EMERG "TSC %Lx ", m->tsc);	if (m->addr)		printk("ADDR %Lx ", m->addr);	if (m->misc)		printk("MISC %Lx ", m->misc);	printk("\n");	printk(KERN_EMERG "This is not a software problem!\n");	printk(KERN_EMERG "Run through mcelog --ascii to decode "	       "and contact your hardware vendor\n");}static void mce_panic(char *msg, struct mce *backup, unsigned long start){	int i;	oops_begin();	for (i = 0; i < MCE_LOG_LEN; i++) {		unsigned long tsc = mcelog.entry[i].tsc;		if (time_before(tsc, start))			continue;		print_mce(&mcelog.entry[i]);		if (backup && mcelog.entry[i].tsc == backup->tsc)			backup = NULL;	}	if (backup)		print_mce(backup);	panic(msg);}static int mce_available(struct cpuinfo_x86 *c){	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);}static inline void mce_get_rip(struct mce *m, struct pt_regs *regs){	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {		m->rip = regs->rip;		m->cs = regs->cs;	} else {		m->rip = 0;		m->cs = 0;	}	if (rip_msr) {		/* Assume the RIP in the MSR is exact. Is this true? */		m->mcgstatus |= MCG_STATUS_EIPV;		rdmsrl(rip_msr, m->rip);		m->cs = 0;	}}/* * The actual machine check handler */void do_machine_check(struct pt_regs * regs, long error_code){	struct mce m, panicm;	u64 mcestart = 0;	int i;	int panicm_found = 0;	/*	 * If no_way_out gets set, there is no safe way to recover from this	 * MCE.  If tolerant is cranked up, we'll try anyway.	 */	int no_way_out = 0;	/*	 * If kill_it gets set, there might be a way to recover from this	 * error.	 */	int kill_it = 0;	atomic_inc(&mce_entry);	if (regs)		notify_die(DIE_NMI, "machine check", regs, error_code, 18,			   SIGKILL);	if (!banks)		goto out2;	memset(&m, 0, sizeof(struct mce));	m.cpu = smp_processor_id();	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);	/* if the restart IP is not valid, we're done for */	if (!(m.mcgstatus & MCG_STATUS_RIPV))		no_way_out = 1;	rdtscll(mcestart);	barrier();	for (i = 0; i < banks; i++) {		if (!bank[i])			continue;		m.misc = 0;		m.addr = 0;		m.bank = i;		m.tsc = 0;		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);		if ((m.status & MCI_STATUS_VAL) == 0)			continue;		if (m.status & MCI_STATUS_EN) {			/* if PCC was set, there's no way out */			no_way_out |= !!(m.status & MCI_STATUS_PCC);			/*			 * If this error was uncorrectable and there was			 * an overflow, we're in trouble.  If no overflow,			 * we might get away with just killing a task.			 */			if (m.status & MCI_STATUS_UC) {				if (tolerant < 1 || m.status & MCI_STATUS_OVER)					no_way_out = 1;				kill_it = 1;			}		}		if (m.status & MCI_STATUS_MISCV)			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);		if (m.status & MCI_STATUS_ADDRV)			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);		mce_get_rip(&m, regs);		if (error_code >= 0)			rdtscll(m.tsc);		if (error_code != -2)			mce_log(&m);		/* Did this bank cause the exception? */		/* Assume that the bank with uncorrectable errors did it,		   and that there is only a single one. */		if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {			panicm = m;			panicm_found = 1;		}		add_taint(TAINT_MACHINE_CHECK);	}	/* Never do anything final in the polling timer */	if (!regs)		goto out;	/* If we didn't find an uncorrectable error, pick	   the last one (shouldn't happen, just being safe). */	if (!panicm_found)		panicm = m;	/*	 * If we have decided that we just CAN'T continue, and the user	 *  has not set tolerant to an insane level, give up and die.	 */	if (no_way_out && tolerant < 3)		mce_panic("Machine check", &panicm, mcestart);	/*	 * If the error seems to be unrecoverable, something should be	 * done.  Try to kill as little as possible.  If we can kill just	 * one task, do that.  If the user has set the tolerance very	 * high, don't try to do anything at all.	 */	if (kill_it && tolerant < 3) {		int user_space = 0;		/*		 * If the EIPV bit is set, it means the saved IP is the		 * instruction which caused the MCE.		 */		if (m.mcgstatus & MCG_STATUS_EIPV)			user_space = panicm.rip && (panicm.cs & 3);		/*		 * If we know that the error was in user space, send a		 * SIGBUS.  Otherwise, panic if tolerance is low.		 *		 * do_exit() takes an awful lot of locks and has a slight		 * risk of deadlocking.		 */		if (user_space) {			do_exit(SIGBUS);		} else if (panic_on_oops || tolerant < 2) {			mce_panic("Uncorrected machine check",				&panicm, mcestart);		}	}	/* notify userspace ASAP */	set_thread_flag(TIF_MCE_NOTIFY); out:	/* the last thing we do is clear state */	for (i = 0; i < banks; i++)		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);	wrmsrl(MSR_IA32_MCG_STATUS, 0); out2:	atomic_dec(&mce_entry);}#ifdef CONFIG_X86_MCE_INTEL/*** * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog * @cpu: The CPU on which the event occurred. * @status: Event status information * * This function should be called by the thermal interrupt after the * event has been processed and the decision was made to log the event * further. * * The status parameter will be saved to the 'status' field of 'struct mce' * and historically has been the register value of the * MSR_IA32_THERMAL_STATUS (Intel) msr. */void mce_log_therm_throt_event(unsigned int cpu, __u64 status){	struct mce m;	memset(&m, 0, sizeof(m));	m.cpu = cpu;	m.bank = MCE_THERMAL_BANK;	m.status = status;	rdtscll(m.tsc);	mce_log(&m);}#endif /* CONFIG_X86_MCE_INTEL *//* * Periodic polling timer for "silent" machine check errors.  If the * poller finds an MCE, poll 2x faster.  When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). */static int check_interval = 5 * 60; /* 5 minutes */static int next_interval; /* in jiffies */static void mcheck_timer(struct work_struct *work);static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);static void mcheck_check_cpu(void *info){	if (mce_available(&current_cpu_data))		do_machine_check(NULL, 0);}static void mcheck_timer(struct work_struct *work){	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);	/*	 * Alert userspace if needed.  If we logged an MCE, reduce the	 * polling interval, otherwise increase the polling interval.	 */	if (mce_notify_user()) {		next_interval = max(next_interval/2, HZ/100);	} else {		next_interval = min(next_interval * 2,				(int)round_jiffies_relative(check_interval*HZ));	}	schedule_delayed_work(&mcheck_work, next_interval);}/* * This is only called from process context.  This is where we do * anything we need to alert userspace about new MCEs.  This is called * directly from the poller and also from entry.S and idle, thanks to * TIF_MCE_NOTIFY. */int mce_notify_user(void){	clear_thread_flag(TIF_MCE_NOTIFY);	if (test_and_clear_bit(0, &notify_user)) {		static unsigned long last_print;		unsigned long now = jiffies;		wake_up_interruptible(&mce_wait);		if (trigger[0])			call_usermodehelper(trigger, trigger_argv, NULL,						UMH_NO_WAIT);		if (time_after_eq(now, last_print + (check_interval*HZ))) {			last_print = now;			printk(KERN_INFO "Machine check events logged\n");		}		return 1;	}	return 0;}/* see if the idle task needs to notify userspace */static intmce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk){	/* IDLE_END should be safe - interrupts are back on */	if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))		mce_notify_user();	return NOTIFY_OK;}static struct notifier_block mce_idle_notifier = {	.notifier_call = mce_idle_callback,};static __init int periodic_mcheck_init(void){	next_interval = check_interval * HZ;	if (next_interval)		schedule_delayed_work(&mcheck_work,				      round_jiffies_relative(next_interval));	idle_notifier_register(&mce_idle_notifier);	return 0;}__initcall(periodic_mcheck_init);/* * Initialize Machine Checks for a CPU. */static void mce_init(void *dummy){	u64 cap;	int i;	rdmsrl(MSR_IA32_MCG_CAP, cap);	banks = cap & 0xff;	if (banks > NR_BANKS) {		printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);		banks = NR_BANKS;	}	/* Use accurate RIP reporting if available. */	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -