📄 mca.c
字号:
/* * File: mca.c * Purpose: Generic MCA handling layer * * Updated for latest kernel * Copyright (C) 2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * * Copyright (C) 2002 Dell Inc. * Copyright (C) Matt Domsch (Matt_Domsch@dell.com) * * Copyright (C) 2002 Intel * Copyright (C) Jenna Hall (jenna.s.hall@intel.com) * * Copyright (C) 2001 Intel * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com) * * Copyright (C) 2000 Intel * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com) * * Copyright (C) 1999, 2004 Silicon Graphics, Inc. * Copyright (C) Vijay Chander(vijay@engr.sgi.com) * * 03/04/15 D. Mosberger Added INIT backtrace support. * 02/03/25 M. Domsch GUID cleanups * * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU * error flag, set SAL default return values, changed * error record structure to linked list, added init call * to sal_get_state_info_size(). * * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected * platform errors, completed code for logging of * corrected & uncorrected machine check errors, and * updated for conformance with Nov. 2000 revision of the * SAL 3.0 spec. * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, * added min save state dump, added INIT handler. * * 2003-12-08 Keith Owens <kaos@sgi.com> * smp_call_function() must not be called from interrupt context (can * deadlock on tasklist_lock). Use keventd to call smp_call_function(). * * 2004-02-01 Keith Owens <kaos@sgi.com> * Avoid deadlock when using printk() for MCA and INIT records. * Delete all record printing code, moved to salinfo_decode in user space. * Mark variables and functions static where possible. * Delete dead variables and functions. * Reorder to remove the need for forward declarations and to consolidate * related code. * * 2005-08-12 Keith Owens <kaos@sgi.com> * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. * * 2005-10-07 Keith Owens <kaos@sgi.com> * Add notify_die() hooks. */#include <linux/config.h>#include <linux/types.h>#include <linux/init.h>#include <linux/sched.h>#include <linux/interrupt.h>#include <linux/irq.h>#include <linux/smp_lock.h>#include <linux/bootmem.h>#include <linux/acpi.h>#include <linux/timer.h>#include <linux/module.h>#include <linux/kernel.h>#include <linux/smp.h>#include <linux/workqueue.h>#include <asm/delay.h>#include <asm/kdebug.h>#include <asm/machvec.h>#include <asm/meminit.h>#include <asm/page.h>#include <asm/ptrace.h>#include <asm/system.h>#include <asm/sal.h>#include <asm/mca.h>#include <asm/irq.h>#include <asm/hw_irq.h>#include "entry.h"#if defined(IA64_MCA_DEBUG_INFO)# define IA64_MCA_DEBUG(fmt...) printk(fmt)#else# define IA64_MCA_DEBUG(fmt...)#endif/* Used by mca_asm.S */u32 ia64_mca_serialize;DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */unsigned long __per_cpu_mca[NR_CPUS];/* In mca_asm.S */extern void ia64_os_init_dispatch_monarch (void);extern void ia64_os_init_dispatch_slave (void);static int monarch_cpu = -1;static ia64_mc_info_t ia64_mc_info;#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */#define CPE_HISTORY_LENGTH 5#define CMC_HISTORY_LENGTH 5static struct timer_list cpe_poll_timer;static struct timer_list cmc_poll_timer;/* * This variable tells whether we are currently in polling mode. * Start with this in the wrong state so we won't play w/ timers * before the system is ready. */static int cmc_polling_enabled = 1;/* * Clearing this variable prevents CPE polling from getting activated * in mca_late_init. Use it if your system doesn't provide a CPEI, * but encounters problems retrieving CPE logs. This should only be * necessary for debugging. */static int cpe_poll_enabled = 1;extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);static int mca_init;static void inlineia64_mca_spin(const char *func){ printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); while (1) cpu_relax();}/* * IA64_MCA log support */#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */typedef struct ia64_state_log_s{ spinlock_t isl_lock; int isl_index; unsigned long isl_count; ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */} ia64_state_log_t;static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];#define IA64_LOG_ALLOCATE(it, size) \ {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ (ia64_err_rec_t *)alloc_bootmem(size); \ ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ (ia64_err_rec_t *)alloc_bootmem(size);}#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index#define IA64_LOG_INDEX_INC(it) \ {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ ia64_state_log[it].isl_count++;}#define IA64_LOG_INDEX_DEC(it) \ ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count/* * ia64_log_init * Reset the OS ia64 log buffer * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) * Outputs : None */static voidia64_log_init(int sal_info_type){ u64 max_size = 0; IA64_LOG_NEXT_INDEX(sal_info_type) = 0; IA64_LOG_LOCK_INIT(sal_info_type); // SAL will tell us the maximum size of any error record of this type max_size = ia64_sal_get_state_info_size(sal_info_type); if (!max_size) /* alloc_bootmem() doesn't like zero-sized allocations! */ return; // set up OS data structures to hold error info IA64_LOG_ALLOCATE(sal_info_type, max_size); memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);}/* * ia64_log_get * * Get the current MCA log from SAL and copy it into the OS log buffer. * * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) * irq_safe whether you can use printk at this point * Outputs : size (total record length) * *buffer (ptr to error record) * */static u64ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe){ sal_log_record_header_t *log_buffer; u64 total_len = 0; int s; IA64_LOG_LOCK(sal_info_type); /* Get the process state information */ log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); if (total_len) { IA64_LOG_INDEX_INC(sal_info_type); IA64_LOG_UNLOCK(sal_info_type); if (irq_safe) { IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. " "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len); } *buffer = (u8 *) log_buffer; return total_len; } else { IA64_LOG_UNLOCK(sal_info_type); return 0; }}/* * ia64_mca_log_sal_error_record * * This function retrieves a specified error record type from SAL * and wakes up any processes waiting for error records. * * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) * FIXME: remove MCA and irq_safe. */static voidia64_mca_log_sal_error_record(int sal_info_type){ u8 *buffer; sal_log_record_header_t *rh; u64 size; int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA;#ifdef IA64_MCA_DEBUG_INFO static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };#endif size = ia64_log_get(sal_info_type, &buffer, irq_safe); if (!size) return; salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); if (irq_safe) IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", smp_processor_id(), sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); /* Clear logs from corrected errors in case there's no user-level logger */ rh = (sal_log_record_header_t *)buffer; if (rh->severity == sal_log_severity_corrected) ia64_sal_clear_state_info(sal_info_type);}/* * platform dependent error handling */#ifndef PLATFORM_MCA_HANDLERS#ifdef CONFIG_ACPIint cpe_vector = -1;static irqreturn_tia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs){ static unsigned long cpe_history[CPE_HISTORY_LENGTH]; static int index; static DEFINE_SPINLOCK(cpe_history_lock); IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", __FUNCTION__, cpe_irq, smp_processor_id()); /* SAL spec states this should run w/ interrupts enabled */ local_irq_enable(); /* Get the CPE error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); spin_lock(&cpe_history_lock); if (!cpe_poll_enabled && cpe_vector >= 0) { int i, count = 1; /* we know 1 happened now */ unsigned long now = jiffies; for (i = 0; i < CPE_HISTORY_LENGTH; i++) { if (now - cpe_history[i] <= HZ) count++; } IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); if (count >= CPE_HISTORY_LENGTH) { cpe_poll_enabled = 1; spin_unlock(&cpe_history_lock); disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); /* * Corrected errors will still be corrected, but * make sure there's a log somewhere that indicates * something is generating more than we can handle. */ printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); /* lock already released, get out now */ return IRQ_HANDLED; } else { cpe_history[index++] = now; if (index == CPE_HISTORY_LENGTH) index = 0; } } spin_unlock(&cpe_history_lock); return IRQ_HANDLED;}#endif /* CONFIG_ACPI */#ifdef CONFIG_ACPI/* * ia64_mca_register_cpev * * Register the corrected platform error vector with SAL. * * Inputs * cpev Corrected Platform Error Vector number * * Outputs * None */static voidia64_mca_register_cpev (int cpev){ /* Register the CPE interrupt vector with SAL */ struct ia64_sal_retval isrv; isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); if (isrv.status) { printk(KERN_ERR "Failed to register Corrected Platform " "Error interrupt vector with SAL (status %ld)\n", isrv.status); return; } IA64_MCA_DEBUG("%s: corrected platform error " "vector %#x registered\n", __FUNCTION__, cpev);}#endif /* CONFIG_ACPI */#endif /* PLATFORM_MCA_HANDLERS *//* * ia64_mca_cmc_vector_setup * * Setup the corrected machine check vector register in the processor. * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) * This function is invoked on a per-processor basis. * * Inputs * None * * Outputs * None */voidia64_mca_cmc_vector_setup (void){ cmcv_reg_t cmcv; cmcv.cmcv_regval = 0; cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ cmcv.cmcv_vector = IA64_CMC_VECTOR; ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); IA64_MCA_DEBUG("%s: CPU %d corrected " "machine check vector %#x registered.\n", __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR); IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));}/* * ia64_mca_cmc_vector_disable * * Mask the corrected machine check vector register in the processor. * This function is invoked on a per-processor basis. * * Inputs * dummy(unused) * * Outputs * None */static voidia64_mca_cmc_vector_disable (void *dummy){ cmcv_reg_t cmcv; cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -