📄 pcibr_error.c

📁 linux-2.4.29操作系统的源码
💻 C
📖 第 1 页 / 共 4 页
字号:
			"\t    Address Holding Link Side Error Reg: 0x%lx\n",			bridge->p_addr_lkerr_64);		}		break;	    case BRIDGE_ISR_SSRAM_PERR:	    /* bit16	SSRAM_PERR */		if (IS_BRIDGE_SOFT(pcibr_soft)) {		    printk(			"\t    Bridge SSRAM Parity Error Register: 0x%x\n",			bridge->b_ram_perr);		}		break;	    case BRIDGE_ISR_PCI_ABORT:	    /* bit15	PCI_ABORT */	    case BRIDGE_ISR_PCI_PARITY:	    /* bit14	PCI_PARITY */	    case BRIDGE_ISR_PCI_SERR:	    /* bit13	PCI_SERR */	    case BRIDGE_ISR_PCI_PERR:	    /* bit12	PCI_PERR */	    case BRIDGE_ISR_PCI_MST_TIMEOUT:/* bit11	PCI_MASTER_TOUT */	    case BRIDGE_ISR_PCI_RETRY_CNT:  /* bit10	PCI_RETRY_CNT */	    case BRIDGE_ISR_GIO_B_ENBL_ERR: /* bit08	GIO BENABLE_ERR */		printk( 		    "\t    PCI Error Upper Address Register: 0x%lx\n"		    "\t    PCI Error Lower Address Register: 0x%lx\n"		    "\t    PCI Error Address: 0x%lx\n",		    (uint64_t) bridge->b_pci_err_upper,		    (uint64_t) bridge->b_pci_err_lower,		    (((uint64_t) bridge->b_pci_err_upper << 32) |		    bridge->b_pci_err_lower));		break;	    case BRIDGE_ISR_XREAD_REQ_TIMEOUT: /* bit09	XREAD_REQ_TOUT */		addr = (((uint64_t)(bridge->b_wid_resp_upper & 0xFFFF) << 32)		    | bridge->b_wid_resp_lower);		printk(		    "\t    Bridge Response Buf Error Upper Addr Reg: 0x%x\n"		    "\t    Bridge Response Buf Error Lower Addr Reg: 0x%x\n"		    "\t    dev-num %d buff-num %d addr 0x%lx\n",		    bridge->b_wid_resp_upper, bridge->b_wid_resp_lower,		    ((bridge->b_wid_resp_upper >> 20) & 0x3),		    ((bridge->b_wid_resp_upper >> 16) & 0xF),		    addr);		break;	    }	}    }    /* We read the INT_MULT register as a 64bit picreg_t for PIC and a     * 32bit bridgereg_t for BRIDGE, but always process the result as a     * 64bit value so the code can be "common" for both PIC and BRIDGE...     */    if (IS_PIC_SOFT(pcibr_soft)) {	mult_int_64 = (bridge->p_mult_int_64 & ~BRIDGE_ISR_INT_MSK);	mult_int = (uint64_t)mult_int_64;	number_bits = PCIBR_ISR_MAX_ERRS_PIC;    } else {	mult_int_32 = (bridge->b_mult_int & ~BRIDGE_ISR_INT_MSK);	mult_int = ((uint64_t)mult_int_32) & 0xffffffff;	number_bits = PCIBR_ISR_MAX_ERRS_BRIDGE;    }    if (IS_XBRIDGE_OR_PIC_SOFT(pcibr_soft)&&(mult_int & ~BRIDGE_ISR_INT_MSK)) {	printk( "    %s Multiple Interrupt Register is 0x%lx\n",		IS_PIC_SOFT(pcibr_soft) ? "PIC" : "XBridge", mult_int);	for (i = PCIBR_ISR_ERR_START; i < number_bits; i++) {	    if (mult_int & (1ull << i))		printk( "\t%s\n", pcibr_isr_errs[i]);	}    }}static uint32_tpcibr_errintr_group(uint32_t error){    uint32_t              group = BRIDGE_IRR_MULTI_CLR;    if (error & BRIDGE_IRR_PCI_GRP)	group |= BRIDGE_IRR_PCI_GRP_CLR;    if (error & BRIDGE_IRR_SSRAM_GRP)	group |= BRIDGE_IRR_SSRAM_GRP_CLR;    if (error & BRIDGE_IRR_LLP_GRP)	group |= BRIDGE_IRR_LLP_GRP_CLR;    if (error & BRIDGE_IRR_REQ_DSP_GRP)	group |= BRIDGE_IRR_REQ_DSP_GRP_CLR;    if (error & BRIDGE_IRR_RESP_BUF_GRP)	group |= BRIDGE_IRR_RESP_BUF_GRP_CLR;    if (error & BRIDGE_IRR_CRP_GRP)	group |= BRIDGE_IRR_CRP_GRP_CLR;    return group;}/* pcibr_pioerr_check(): *	Check to see if this pcibr has a PCI PIO *	TIMEOUT error; if so, bump the timeout-count *	on any piomaps that could cover the address. */static voidpcibr_pioerr_check(pcibr_soft_t soft){    bridge_t		   *bridge;    uint64_t              int_status;    bridgereg_t             int_status_32;    picreg_t                int_status_64;    bridgereg_t		    pci_err_lower;    bridgereg_t		    pci_err_upper;    iopaddr_t		    pci_addr;    pciio_slot_t	    slot;    pcibr_piomap_t	    map;    iopaddr_t		    base;    size_t		    size;    unsigned		    win;    int			    func;    bridge = soft->bs_base;    /* We read the INT_STATUS register as a 64bit picreg_t for PIC and a     * 32bit bridgereg_t for BRIDGE, but always process the result as a     * 64bit value so the code can be "common" for both PIC and BRIDGE...     */    if (IS_PIC_SOFT(soft)) {        int_status_64 = (bridge->p_int_status_64 & ~BRIDGE_ISR_INT_MSK);        int_status = (uint64_t)int_status_64;    } else {        int_status_32 = (bridge->b_int_status & ~BRIDGE_ISR_INT_MSK);        int_status = ((uint64_t)int_status_32) & 0xffffffff;    }    if (int_status & BRIDGE_ISR_PCIBUS_PIOERR) {	pci_err_lower = bridge->b_pci_err_lower;	pci_err_upper = bridge->b_pci_err_upper;	pci_addr = pci_err_upper & BRIDGE_ERRUPPR_ADDRMASK;	pci_addr = (pci_addr << 32) | pci_err_lower;	slot = PCIBR_NUM_SLOTS(soft);	while (slot-- > 0) {	    int 		nfunc = soft->bs_slot[slot].bss_ninfo;	    pcibr_info_h	pcibr_infoh = soft->bs_slot[slot].bss_infos;	    for (func = 0; func < nfunc; func++) {		pcibr_info_t 	pcibr_info = pcibr_infoh[func];		if (!pcibr_info)		    continue;		for (map = pcibr_info->f_piomap;		        map != NULL; map = map->bp_next) {		    base = map->bp_pciaddr;		    size = map->bp_mapsz;		    win = map->bp_space - PCIIO_SPACE_WIN(0);		    if (win < 6)			base += soft->bs_slot[slot].bss_window[win].bssw_base;		    else if (map->bp_space == PCIIO_SPACE_ROM)			base += pcibr_info->f_rbase;		    if ((pci_addr >= base) && (pci_addr < (base + size)))			atomic_inc(&map->bp_toc[0]);		}	    }	}    }}/* * PCI Bridge Error interrupt handler. *      This gets invoked, whenever a PCI bridge sends an error interrupt. *      Primarily this servers two purposes. *              - If an error can be handled (typically a PIO read/write *                error, we try to do it silently. *              - If an error cannot be handled, we die violently. *      Interrupt due to PIO errors: *              - Bridge sends an interrupt, whenever a PCI operation *                done by the bridge as the master fails. Operations could *                be either a PIO read or a PIO write. *                PIO Read operation also triggers a bus error, and it's *                We primarily ignore this interrupt in that context.. *                For PIO write errors, this is the only indication. *                and we have to handle with the info from here. * *                So, there is no way to distinguish if an interrupt is *                due to read or write error!. */voidpcibr_error_intr_handler(int irq, void *arg, struct pt_regs *ep){    pcibr_soft_t            pcibr_soft;    bridge_t               *bridge;    uint64_t              int_status;    uint64_t              err_status;    bridgereg_t             int_status_32;    picreg_t                int_status_64;    int			    number_bits;    int                     i;    uint64_t		    disable_errintr_mask = 0;    nasid_t		    nasid;#if PCIBR_SOFT_LIST    /*     * Defensive code for linked pcibr_soft structs     */    {	extern pcibr_list_p	pcibr_list;	pcibr_list_p            entry;	entry = pcibr_list;	while (1) {	    if (entry == NULL) {		panic("pcibr_error_intr_handler:\tmy parameter (0x%p) is not a pcibr_soft!", arg);	    }	    if ((intr_arg_t) entry->bl_soft == arg)		break;	    entry = entry->bl_next;	}    }#endif /* PCIBR_SOFT_LIST */    pcibr_soft = (pcibr_soft_t) arg;    bridge = pcibr_soft->bs_base;    /*     * pcibr_error_intr_handler gets invoked whenever bridge encounters     * an error situation, and the interrupt for that error is enabled.     * This routine decides if the error is fatal or not, and takes     * action accordingly.     *     * In the case of PIO read/write timeouts, there is no way     * to know if it was a read or write request that timed out.     * If the error was due to a "read", a bus error will also occur     * and the bus error handling code takes care of it.      * If the error is due to a "write", the error is currently logged      * by this routine. For SN1 and SN0, if fire-and-forget mode is      * disabled, a write error response xtalk packet will be sent to      * the II, which will cause an II error interrupt. No write error      * recovery actions of any kind currently take place at the pcibr      * layer! (e.g., no panic on unrecovered write error)     *     * Prior to reading the Bridge int_status register we need to ensure     * that there are no error bits set in the lower layers (hubii)     * that have disabled PIO access to the widget. If so, there is nothing     * we can do until the bits clear, so we setup a timeout and try again     * later.     */    nasid = NASID_GET(bridge);    if (hubii_check_widget_disabled(nasid, pcibr_soft->bs_xid)) {	DECLARE_WAIT_QUEUE_HEAD(wq);	sleep_on_timeout(&wq, BRIDGE_PIOERR_TIMEOUT*HZ );  /* sleep */	pcibr_soft->bs_errinfo.bserr_toutcnt++;	/* Let's go recursive */	return(pcibr_error_intr_handler(irq, arg, ep));    }    /* We read the INT_STATUS register as a 64bit picreg_t for PIC and a     * 32bit bridgereg_t for BRIDGE, but always process the result as a     * 64bit value so the code can be "common" for both PIC and BRIDGE...     */    if (IS_PIC_SOFT(pcibr_soft)) {        int_status_64 = (bridge->p_int_status_64 & ~BRIDGE_ISR_INT_MSK);        int_status = (uint64_t)int_status_64;        number_bits = PCIBR_ISR_MAX_ERRS_PIC;    } else {        int_status_32 = (bridge->b_int_status & ~BRIDGE_ISR_INT_MSK);        int_status = ((uint64_t)int_status_32) & 0xffffffff;        number_bits = PCIBR_ISR_MAX_ERRS_BRIDGE;    }    PCIBR_DEBUG_ALWAYS((PCIBR_DEBUG_INTR_ERROR, pcibr_soft->bs_conn,		"pcibr_error_intr_handler: int_status=0x%x\n", int_status));    /* int_status is which bits we have to clear;     * err_status is the bits we haven't handled yet.     */    err_status = int_status & ~BRIDGE_ISR_MULTI_ERR;    if (!(int_status & ~BRIDGE_ISR_INT_MSK)) {	/*	 * No error bit set!!.	 */	return;    }    /*     * If we have a PCIBUS_PIOERR, hand it to the logger.     */    if (int_status & BRIDGE_ISR_PCIBUS_PIOERR) {	pcibr_pioerr_check(pcibr_soft);    }    if (err_status) {	struct bs_errintr_stat_s *bs_estat = pcibr_soft->bs_errintr_stat;	for (i = PCIBR_ISR_ERR_START; i < number_bits; i++, bs_estat++) {	    if (err_status & (1ull << i)) {		uint32_t              errrate = 0;		uint32_t              errcount = 0;		uint32_t              errinterval = 0, current_tick = 0;		int                     llp_tx_retry_errors = 0;		int                     is_llp_tx_retry_intr = 0;		bs_estat->bs_errcount_total++;		current_tick = jiffies;		errinterval = (current_tick - bs_estat->bs_lasterr_timestamp);		errcount = (bs_estat->bs_errcount_total -			    bs_estat->bs_lasterr_snapshot);		/* LLP interrrupt errors are only valid on BUS0 of the PIC */		if (pcibr_soft->bs_busnum == 0)		    is_llp_tx_retry_intr = (BRIDGE_ISR_LLP_TX_RETRY==(1ull << i));		/* Check for the divide by zero condition while		 * calculating the error rates.		 */		if (errinterval) {		    errrate = errcount / errinterval;		    /* If able to calculate error rate		     * on a LLP transmitter retry interrupt, check		     * if the error rate is nonzero and we have seen		     * a certain minimum number of errors.		     *		     * NOTE : errcount is being compared to		     * PCIBR_ERRTIME_THRESHOLD to make sure that we are not		     * seeing cases like x error interrupts per y ticks for		     * very low x ,y (x > y ) which could result in a		     * rate > 100/tick.		     */		    if (is_llp_tx_retry_intr &&			errrate &&			(errcount >= PCIBR_ERRTIME_THRESHOLD)) {			llp_tx_retry_errors = 1;		    }		} else {		    errrate = 0;		    /* Since we are not able to calculate the		     * error rate check if we exceeded a certain		     * minimum number of errors for LLP transmitter		     * retries. Note that this can only happen		     * within the first tick after the last snapshot.		     */		    if (is_llp_tx_retry_intr &&			(errcount >= PCIBR_ERRINTR_DISABLE_LEVEL)) {			llp_tx_retry_errors = 1;		    }		}		/*		 * If a non-zero error rate (which is equivalent to		 * to 100 errors/tick at least) for the LLP transmitter		 * retry interrupt was seen, check if we should print		 * a warning message.		 */		if (llp_tx_retry_errors) {		    static uint32_t       last_printed_rate;		    if (errrate > last_printed_rate) {			last_printed_rate = errrate;			/* Print the warning only if the error rate			 * for the transmitter retry interrupt			 * exceeded the previously printed rate.			 */			printk(KERN_WARNING				"%s: %s, Excessive error interrupts : %d/tick\n",				pcibr_soft->bs_name,				pcibr_isr_errs[i],				errrate);		    }		    /*		     * Update snapshot, and time		     */		    bs_estat->bs_lasterr_timestamp = current_tick;		    bs_estat->bs_lasterr_snapshot =			bs_estat->bs_errcount_total;		}		/*		 * If the error rate is high enough, print the error rate.		 */		if (errinterval > PCIBR_ERRTIME_THRESHOLD) {		    if (errrate > PCIBR_ERRRATE_THRESHOLD) {			printk(KERN_NOTICE "%s: %s, Error rate %d/tick",				pcibr_soft->bs_name,				pcibr_isr_errs[i],				errrate);			/*			 * Update snapshot, and time			 */			bs_estat->bs_lasterr_timestamp = current_tick;			bs_estat->bs_lasterr_snapshot =			    bs_estat->bs_errcount_total;		    }		}		/* PIC BRINGUP WAR (PV# 856155):		 * Dont disable PCI_X_ARB_ERR interrupts, we need the		 * interrupt inorder to clear the DEV_BROKE bits in		 * b_arb register to re-enable the device.		 */		if (IS_PIC_SOFT(pcibr_soft) &&				!(err_status & PIC_ISR_PCIX_ARB_ERR) &&				PCIBR_WAR_ENABLED(PV856155, pcibr_soft)) {		if (bs_estat->bs_errcount_total > PCIBR_ERRINTR_DISABLE_LEVEL) {		    /*		     * We have seen a fairly large number of errors of		     * this type. Let's disable the interrupt. But flash		     * a message about the interrupt being disabled.		     */		    printk(KERN_NOTICE			    "%s Disabling error interrupt type %s. Error count %d",			    pcibr_soft->bs_name,			    pcibr_isr_errs[i],			    bs_estat->bs_errcount_total);		    disable_errintr_mask |= (1ull << i);		}		} /* PIC: WAR for PV 856155 end-of-if */	    }	}    }    if (disable_errintr_mask) {	unsigned s;	/*	 * Disable some high frequency errors as they	 * could eat up too much cpu time.	 */	s = pcibr_lock(pcibr_soft);	if (IS_PIC_SOFT(pcibr_soft)) {	    bridge->p_int_enable_64 &= (picreg_t)(~disable_errintr_mask);	} else {	    bridge->b_int_enable &= (bridgereg_t)(~disable_errintr_mask);	}	pcibr_unlock(pcibr_soft, s);    }    /*     * If we leave the PROM cacheable, T5 might     * try to do a cache line sized writeback to it,     * which will cause a BRIDGE_ISR_INVLD_ADDR.     */    if ((err_status & BRIDGE_ISR_INVLD_ADDR) &&	(0x00000000 == bridge->b_wid_err_upper) &&	(0x00C00000 == (0xFFC00000 & bridge->b_wid_err_lower)) &&	(0x00402000 == (0x00F07F00 & bridge->b_wid_err_cmdword))) {	err_status &= ~BRIDGE_ISR_INVLD_ADDR;    }    /*     * The bridge bug (PCIBR_LLP_CONTROL_WAR), where the llp_config or control registers     * need to be read back after being written, affects an MP     * system since there could be small windows between writing     * the register and reading it back on one cpu while another     * cpu is fielding an interrupt. If we run into this scenario,     * workaround the problem by ignoring the error. (bug 454474)     * pcibr_llp_control_war_cnt keeps an approximate number of     * times we saw this problem on a system.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -