📄 mca.c

📁 linux2.6.16版本
💻 C
📖 第 1 页 / 共 4 页
字号:
	 * are in the current RBS, copy them back to the original stack.  The	 * copy must be done register by register because the original bspstore	 * and the current one have different alignments, so the saved RNAT	 * data occurs at different places.	 *	 * mca_asm does cover, so the old_bsp already includes all registers at	 * the time of MCA/INIT.  It also does flushrs, so all registers before	 * this function have been written to backing store on the MCA/INIT	 * stack.	 */	new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore));	old_rnat = regs->ar_rnat;	while (slots--) {		if (ia64_rse_is_rnat_slot(new_bspstore)) {			new_rnat = ia64_get_rnat(new_bspstore++);		}		if (ia64_rse_is_rnat_slot(old_bspstore)) {			*old_bspstore++ = old_rnat;			old_rnat = 0;		}		nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL;		old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore));		old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));		*old_bspstore++ = *new_bspstore++;	}	old_sw->ar_bspstore = (unsigned long)old_bspstore;	old_sw->ar_rnat = old_rnat;	sos->prev_task = previous_current;	return previous_current;no_mod:	printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",			smp_processor_id(), type, msg);	return previous_current;}/* The monarch/slave interaction is based on monarch_cpu and requires that all * slaves have entered rendezvous before the monarch leaves.  If any cpu has * not entered rendezvous yet then wait a bit.  The assumption is that any * slave that has not rendezvoused after a reasonable time is never going to do * so.  In this context, slave includes cpus that respond to the MCA rendezvous * interrupt, as well as cpus that receive the INIT slave event. */static voidia64_wait_for_slaves(int monarch){	int c, wait = 0;	for_each_online_cpu(c) {		if (c == monarch)			continue;		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {			udelay(1000);		/* short wait first */			wait = 1;			break;		}	}	if (!wait)		return;	for_each_online_cpu(c) {		if (c == monarch)			continue;		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {			udelay(5*1000000);	/* wait 5 seconds for slaves (arbitrary) */			break;		}	}}/* * ia64_mca_handler * *	This is uncorrectable machine check handler called from OS_MCA *	dispatch code which is in turn called from SAL_CHECK(). *	This is the place where the core of OS MCA handling is done. *	Right now the logs are extracted and displayed in a well-defined *	format. This handler code is supposed to be run only on the *	monarch processor. Once the monarch is done with MCA handling *	further MCA logging is enabled by clearing logs. *	Monarch also has the duty of sending wakeup-IPIs to pull the *	slave processors out of rendezvous spinloop. */voidia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,		 struct ia64_sal_os_state *sos){	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)		&sos->proc_state_param;	int recover, cpu = smp_processor_id();	task_t *previous_current;	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");	monarch_cpu = cpu;	if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)			== NOTIFY_STOP)		ia64_mca_spin(__FUNCTION__);	ia64_wait_for_slaves(cpu);	/* Wakeup all the processors which are spinning in the rendezvous loop.	 * They will leave SAL, then spin in the OS with interrupts disabled	 * until this monarch cpu leaves the MCA handler.  That gets control	 * back to the OS so we can backtrace the other cpus, backtrace when	 * spinning in SAL does not work.	 */	ia64_mca_wakeup_all();	if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, 0, 0, 0)			== NOTIFY_STOP)		ia64_mca_spin(__FUNCTION__);	/* Get the MCA error record and log it */	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);	/* TLB error is only exist in this SAL error record */	recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))	/* other error recovery */	   || (ia64_mca_ucmc_extension		&& ia64_mca_ucmc_extension(			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),			sos));	if (recover) {		sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);		rh->severity = sal_log_severity_corrected;		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);		sos->os_status = IA64_MCA_CORRECTED;	}	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, 0, 0, recover)			== NOTIFY_STOP)		ia64_mca_spin(__FUNCTION__);	set_curr_task(cpu, previous_current);	monarch_cpu = -1;}static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL);/* * ia64_mca_cmc_int_handler * *  This is corrected machine check interrupt handler. *	Right now the logs are extracted and displayed in a well-defined *	format. * * Inputs *      interrupt number *      client data arg ptr *      saved registers ptr * * Outputs *	None */static irqreturn_tia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs){	static unsigned long	cmc_history[CMC_HISTORY_LENGTH];	static int		index;	static DEFINE_SPINLOCK(cmc_history_lock);	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",		       __FUNCTION__, cmc_irq, smp_processor_id());	/* SAL spec states this should run w/ interrupts enabled */	local_irq_enable();	/* Get the CMC error record and log it */	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);	spin_lock(&cmc_history_lock);	if (!cmc_polling_enabled) {		int i, count = 1; /* we know 1 happened now */		unsigned long now = jiffies;		for (i = 0; i < CMC_HISTORY_LENGTH; i++) {			if (now - cmc_history[i] <= HZ)				count++;		}		IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);		if (count >= CMC_HISTORY_LENGTH) {			cmc_polling_enabled = 1;			spin_unlock(&cmc_history_lock);			/* If we're being hit with CMC interrupts, we won't			 * ever execute the schedule_work() below.  Need to			 * disable CMC interrupts on this processor now.			 */			ia64_mca_cmc_vector_disable(NULL);			schedule_work(&cmc_disable_work);			/*			 * Corrected errors will still be corrected, but			 * make sure there's a log somewhere that indicates			 * something is generating more than we can handle.			 */			printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n");			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);			/* lock already released, get out now */			return IRQ_HANDLED;		} else {			cmc_history[index++] = now;			if (index == CMC_HISTORY_LENGTH)				index = 0;		}	}	spin_unlock(&cmc_history_lock);	return IRQ_HANDLED;}/* *  ia64_mca_cmc_int_caller * * 	Triggered by sw interrupt from CMC polling routine.  Calls * 	real interrupt handler and either triggers a sw interrupt * 	on the next cpu or does cleanup at the end. * * Inputs *	interrupt number *	client data arg ptr *	saved registers ptr * Outputs * 	handled */static irqreturn_tia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs){	static int start_count = -1;	unsigned int cpuid;	cpuid = smp_processor_id();	/* If first cpu, update count */	if (start_count == -1)		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);	ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs);	for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);	if (cpuid < NR_CPUS) {		platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);	} else {		/* If no log record, switch out of polling mode */		if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {			printk(KERN_WARNING "Returning to interrupt driven CMC handler\n");			schedule_work(&cmc_enable_work);			cmc_polling_enabled = 0;		} else {			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);		}		start_count = -1;	}	return IRQ_HANDLED;}/* *  ia64_mca_cmc_poll * *	Poll for Corrected Machine Checks (CMCs) * * Inputs   :   dummy(unused) * Outputs  :   None * */static voidia64_mca_cmc_poll (unsigned long dummy){	/* Trigger a CMC interrupt cascade  */	platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);}/* *  ia64_mca_cpe_int_caller * * 	Triggered by sw interrupt from CPE polling routine.  Calls * 	real interrupt handler and either triggers a sw interrupt * 	on the next cpu or does cleanup at the end. * * Inputs *	interrupt number *	client data arg ptr *	saved registers ptr * Outputs * 	handled */#ifdef CONFIG_ACPIstatic irqreturn_tia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs){	static int start_count = -1;	static int poll_time = MIN_CPE_POLL_INTERVAL;	unsigned int cpuid;	cpuid = smp_processor_id();	/* If first cpu, update count */	if (start_count == -1)		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);	ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs);	for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);	if (cpuid < NR_CPUS) {		platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);	} else {		/*		 * If a log was recorded, increase our polling frequency,		 * otherwise, backoff or return to interrupt mode.		 */		if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {			poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2);		} else if (cpe_vector < 0) {			poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);		} else {			poll_time = MIN_CPE_POLL_INTERVAL;			printk(KERN_WARNING "Returning to interrupt driven CPE handler\n");			enable_irq(local_vector_to_irq(IA64_CPE_VECTOR));			cpe_poll_enabled = 0;		}		if (cpe_poll_enabled)			mod_timer(&cpe_poll_timer, jiffies + poll_time);		start_count = -1;	}	return IRQ_HANDLED;}/* *  ia64_mca_cpe_poll * *	Poll for Corrected Platform Errors (CPEs), trigger interrupt *	on first cpu, from there it will trickle through all the cpus. * * Inputs   :   dummy(unused) * Outputs  :   None * */static voidia64_mca_cpe_poll (unsigned long dummy){	/* Trigger a CPE interrupt cascade  */	platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);}#endif /* CONFIG_ACPI */static intdefault_monarch_init_process(struct notifier_block *self, unsigned long val, void *data){	int c;	struct task_struct *g, *t;	if (val != DIE_INIT_MONARCH_PROCESS)		return NOTIFY_DONE;	printk(KERN_ERR "Processes interrupted by INIT -");	for_each_online_cpu(c) {		struct ia64_sal_os_state *s;		t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);		s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);		g = s->prev_task;		if (g) {			if (g->pid)				printk(" %d", g->pid);			else				printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);		}	}	printk("\n\n");	if (read_trylock(&tasklist_lock)) {		do_each_thread (g, t) {			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);			show_stack(t, NULL);		} while_each_thread (g, t);		read_unlock(&tasklist_lock);	}	return NOTIFY_DONE;}/* * C portion of the OS INIT handler * * Called from ia64_os_init_dispatch * * Inputs: pointer to pt_regs where processor info was saved.  SAL/OS state for * this event.  This code is used for both monarch and slave INIT events, see * sos->monarch. * * All INIT events switch to the INIT stack and change the previous process to * blocked status.  If one of the INIT events is the monarch then we are * probably processing the nmi button/command.  Use the monarch cpu to dump all * the processes.  The slave INIT events all spin until the monarch cpu * returns.  We can also get INIT slave events for MCA, in which case the MCA * process is the monarch. */voidia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,		  struct ia64_sal_os_state *sos){	static atomic_t slaves;	static atomic_t monarchs;	task_t *previous_current;	int cpu = smp_processor_id();	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */	console_loglevel = 15;	/* make sure printks make it to console */	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",		sos->proc_state_param, cpu, sos->monarch);	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT");	sos->os_status = IA64_INIT_RESUME;	/* FIXME: Workaround for broken proms that drive all INIT events as	 * slaves.  The last slave that enters is promoted to be a monarch.	 * Remove this code in September 2006, that gives platforms a year to	 * fix their proms and get their customers updated.	 */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -