📄 pcibr_error.c
字号:
"\t Address Holding Link Side Error Reg: 0x%lx\n", bridge->p_addr_lkerr_64); } break; case BRIDGE_ISR_SSRAM_PERR: /* bit16 SSRAM_PERR */ if (IS_BRIDGE_SOFT(pcibr_soft)) { printk( "\t Bridge SSRAM Parity Error Register: 0x%x\n", bridge->b_ram_perr); } break; case BRIDGE_ISR_PCI_ABORT: /* bit15 PCI_ABORT */ case BRIDGE_ISR_PCI_PARITY: /* bit14 PCI_PARITY */ case BRIDGE_ISR_PCI_SERR: /* bit13 PCI_SERR */ case BRIDGE_ISR_PCI_PERR: /* bit12 PCI_PERR */ case BRIDGE_ISR_PCI_MST_TIMEOUT:/* bit11 PCI_MASTER_TOUT */ case BRIDGE_ISR_PCI_RETRY_CNT: /* bit10 PCI_RETRY_CNT */ case BRIDGE_ISR_GIO_B_ENBL_ERR: /* bit08 GIO BENABLE_ERR */ printk( "\t PCI Error Upper Address Register: 0x%lx\n" "\t PCI Error Lower Address Register: 0x%lx\n" "\t PCI Error Address: 0x%lx\n", (uint64_t) bridge->b_pci_err_upper, (uint64_t) bridge->b_pci_err_lower, (((uint64_t) bridge->b_pci_err_upper << 32) | bridge->b_pci_err_lower)); break; case BRIDGE_ISR_XREAD_REQ_TIMEOUT: /* bit09 XREAD_REQ_TOUT */ addr = (((uint64_t)(bridge->b_wid_resp_upper & 0xFFFF) << 32) | bridge->b_wid_resp_lower); printk( "\t Bridge Response Buf Error Upper Addr Reg: 0x%x\n" "\t Bridge Response Buf Error Lower Addr Reg: 0x%x\n" "\t dev-num %d buff-num %d addr 0x%lx\n", bridge->b_wid_resp_upper, bridge->b_wid_resp_lower, ((bridge->b_wid_resp_upper >> 20) & 0x3), ((bridge->b_wid_resp_upper >> 16) & 0xF), addr); break; } } } /* We read the INT_MULT register as a 64bit picreg_t for PIC and a * 32bit bridgereg_t for BRIDGE, but always process the result as a * 64bit value so the code can be "common" for both PIC and BRIDGE... */ if (IS_PIC_SOFT(pcibr_soft)) { mult_int_64 = (bridge->p_mult_int_64 & ~BRIDGE_ISR_INT_MSK); mult_int = (uint64_t)mult_int_64; number_bits = PCIBR_ISR_MAX_ERRS_PIC; } else { mult_int_32 = (bridge->b_mult_int & ~BRIDGE_ISR_INT_MSK); mult_int = ((uint64_t)mult_int_32) & 0xffffffff; number_bits = PCIBR_ISR_MAX_ERRS_BRIDGE; } if (IS_XBRIDGE_OR_PIC_SOFT(pcibr_soft)&&(mult_int & ~BRIDGE_ISR_INT_MSK)) { printk( " %s Multiple Interrupt Register is 0x%lx\n", IS_PIC_SOFT(pcibr_soft) ? "PIC" : "XBridge", mult_int); for (i = PCIBR_ISR_ERR_START; i < number_bits; i++) { if (mult_int & (1ull << i)) printk( "\t%s\n", pcibr_isr_errs[i]); } }}static uint32_tpcibr_errintr_group(uint32_t error){ uint32_t group = BRIDGE_IRR_MULTI_CLR; if (error & BRIDGE_IRR_PCI_GRP) group |= BRIDGE_IRR_PCI_GRP_CLR; if (error & BRIDGE_IRR_SSRAM_GRP) group |= BRIDGE_IRR_SSRAM_GRP_CLR; if (error & BRIDGE_IRR_LLP_GRP) group |= BRIDGE_IRR_LLP_GRP_CLR; if (error & BRIDGE_IRR_REQ_DSP_GRP) group |= BRIDGE_IRR_REQ_DSP_GRP_CLR; if (error & BRIDGE_IRR_RESP_BUF_GRP) group |= BRIDGE_IRR_RESP_BUF_GRP_CLR; if (error & BRIDGE_IRR_CRP_GRP) group |= BRIDGE_IRR_CRP_GRP_CLR; return group;}/* pcibr_pioerr_check(): * Check to see if this pcibr has a PCI PIO * TIMEOUT error; if so, bump the timeout-count * on any piomaps that could cover the address. */static voidpcibr_pioerr_check(pcibr_soft_t soft){ bridge_t *bridge; uint64_t int_status; bridgereg_t int_status_32; picreg_t int_status_64; bridgereg_t pci_err_lower; bridgereg_t pci_err_upper; iopaddr_t pci_addr; pciio_slot_t slot; pcibr_piomap_t map; iopaddr_t base; size_t size; unsigned win; int func; bridge = soft->bs_base; /* We read the INT_STATUS register as a 64bit picreg_t for PIC and a * 32bit bridgereg_t for BRIDGE, but always process the result as a * 64bit value so the code can be "common" for both PIC and BRIDGE... */ if (IS_PIC_SOFT(soft)) { int_status_64 = (bridge->p_int_status_64 & ~BRIDGE_ISR_INT_MSK); int_status = (uint64_t)int_status_64; } else { int_status_32 = (bridge->b_int_status & ~BRIDGE_ISR_INT_MSK); int_status = ((uint64_t)int_status_32) & 0xffffffff; } if (int_status & BRIDGE_ISR_PCIBUS_PIOERR) { pci_err_lower = bridge->b_pci_err_lower; pci_err_upper = bridge->b_pci_err_upper; pci_addr = pci_err_upper & BRIDGE_ERRUPPR_ADDRMASK; pci_addr = (pci_addr << 32) | pci_err_lower; slot = PCIBR_NUM_SLOTS(soft); while (slot-- > 0) { int nfunc = soft->bs_slot[slot].bss_ninfo; pcibr_info_h pcibr_infoh = soft->bs_slot[slot].bss_infos; for (func = 0; func < nfunc; func++) { pcibr_info_t pcibr_info = pcibr_infoh[func]; if (!pcibr_info) continue; for (map = pcibr_info->f_piomap; map != NULL; map = map->bp_next) { base = map->bp_pciaddr; size = map->bp_mapsz; win = map->bp_space - PCIIO_SPACE_WIN(0); if (win < 6) base += soft->bs_slot[slot].bss_window[win].bssw_base; else if (map->bp_space == PCIIO_SPACE_ROM) base += pcibr_info->f_rbase; if ((pci_addr >= base) && (pci_addr < (base + size))) atomic_inc(&map->bp_toc[0]); } } } }}/* * PCI Bridge Error interrupt handler. * This gets invoked, whenever a PCI bridge sends an error interrupt. * Primarily this servers two purposes. * - If an error can be handled (typically a PIO read/write * error, we try to do it silently. * - If an error cannot be handled, we die violently. * Interrupt due to PIO errors: * - Bridge sends an interrupt, whenever a PCI operation * done by the bridge as the master fails. Operations could * be either a PIO read or a PIO write. * PIO Read operation also triggers a bus error, and it's * We primarily ignore this interrupt in that context.. * For PIO write errors, this is the only indication. * and we have to handle with the info from here. * * So, there is no way to distinguish if an interrupt is * due to read or write error!. */voidpcibr_error_intr_handler(int irq, void *arg, struct pt_regs *ep){ pcibr_soft_t pcibr_soft; bridge_t *bridge; uint64_t int_status; uint64_t err_status; bridgereg_t int_status_32; picreg_t int_status_64; int number_bits; int i; uint64_t disable_errintr_mask = 0; nasid_t nasid;#if PCIBR_SOFT_LIST /* * Defensive code for linked pcibr_soft structs */ { extern pcibr_list_p pcibr_list; pcibr_list_p entry; entry = pcibr_list; while (1) { if (entry == NULL) { panic("pcibr_error_intr_handler:\tmy parameter (0x%p) is not a pcibr_soft!", arg); } if ((intr_arg_t) entry->bl_soft == arg) break; entry = entry->bl_next; } }#endif /* PCIBR_SOFT_LIST */ pcibr_soft = (pcibr_soft_t) arg; bridge = pcibr_soft->bs_base; /* * pcibr_error_intr_handler gets invoked whenever bridge encounters * an error situation, and the interrupt for that error is enabled. * This routine decides if the error is fatal or not, and takes * action accordingly. * * In the case of PIO read/write timeouts, there is no way * to know if it was a read or write request that timed out. * If the error was due to a "read", a bus error will also occur * and the bus error handling code takes care of it. * If the error is due to a "write", the error is currently logged * by this routine. For SN1 and SN0, if fire-and-forget mode is * disabled, a write error response xtalk packet will be sent to * the II, which will cause an II error interrupt. No write error * recovery actions of any kind currently take place at the pcibr * layer! (e.g., no panic on unrecovered write error) * * Prior to reading the Bridge int_status register we need to ensure * that there are no error bits set in the lower layers (hubii) * that have disabled PIO access to the widget. If so, there is nothing * we can do until the bits clear, so we setup a timeout and try again * later. */ nasid = NASID_GET(bridge); if (hubii_check_widget_disabled(nasid, pcibr_soft->bs_xid)) { DECLARE_WAIT_QUEUE_HEAD(wq); sleep_on_timeout(&wq, BRIDGE_PIOERR_TIMEOUT*HZ ); /* sleep */ pcibr_soft->bs_errinfo.bserr_toutcnt++; /* Let's go recursive */ return(pcibr_error_intr_handler(irq, arg, ep)); } /* We read the INT_STATUS register as a 64bit picreg_t for PIC and a * 32bit bridgereg_t for BRIDGE, but always process the result as a * 64bit value so the code can be "common" for both PIC and BRIDGE... */ if (IS_PIC_SOFT(pcibr_soft)) { int_status_64 = (bridge->p_int_status_64 & ~BRIDGE_ISR_INT_MSK); int_status = (uint64_t)int_status_64; number_bits = PCIBR_ISR_MAX_ERRS_PIC; } else { int_status_32 = (bridge->b_int_status & ~BRIDGE_ISR_INT_MSK); int_status = ((uint64_t)int_status_32) & 0xffffffff; number_bits = PCIBR_ISR_MAX_ERRS_BRIDGE; } PCIBR_DEBUG_ALWAYS((PCIBR_DEBUG_INTR_ERROR, pcibr_soft->bs_conn, "pcibr_error_intr_handler: int_status=0x%x\n", int_status)); /* int_status is which bits we have to clear; * err_status is the bits we haven't handled yet. */ err_status = int_status & ~BRIDGE_ISR_MULTI_ERR; if (!(int_status & ~BRIDGE_ISR_INT_MSK)) { /* * No error bit set!!. */ return; } /* * If we have a PCIBUS_PIOERR, hand it to the logger. */ if (int_status & BRIDGE_ISR_PCIBUS_PIOERR) { pcibr_pioerr_check(pcibr_soft); } if (err_status) { struct bs_errintr_stat_s *bs_estat = pcibr_soft->bs_errintr_stat; for (i = PCIBR_ISR_ERR_START; i < number_bits; i++, bs_estat++) { if (err_status & (1ull << i)) { uint32_t errrate = 0; uint32_t errcount = 0; uint32_t errinterval = 0, current_tick = 0; int llp_tx_retry_errors = 0; int is_llp_tx_retry_intr = 0; bs_estat->bs_errcount_total++; current_tick = jiffies; errinterval = (current_tick - bs_estat->bs_lasterr_timestamp); errcount = (bs_estat->bs_errcount_total - bs_estat->bs_lasterr_snapshot); /* LLP interrrupt errors are only valid on BUS0 of the PIC */ if (pcibr_soft->bs_busnum == 0) is_llp_tx_retry_intr = (BRIDGE_ISR_LLP_TX_RETRY==(1ull << i)); /* Check for the divide by zero condition while * calculating the error rates. */ if (errinterval) { errrate = errcount / errinterval; /* If able to calculate error rate * on a LLP transmitter retry interrupt, check * if the error rate is nonzero and we have seen * a certain minimum number of errors. * * NOTE : errcount is being compared to * PCIBR_ERRTIME_THRESHOLD to make sure that we are not * seeing cases like x error interrupts per y ticks for * very low x ,y (x > y ) which could result in a * rate > 100/tick. */ if (is_llp_tx_retry_intr && errrate && (errcount >= PCIBR_ERRTIME_THRESHOLD)) { llp_tx_retry_errors = 1; } } else { errrate = 0; /* Since we are not able to calculate the * error rate check if we exceeded a certain * minimum number of errors for LLP transmitter * retries. Note that this can only happen * within the first tick after the last snapshot. */ if (is_llp_tx_retry_intr && (errcount >= PCIBR_ERRINTR_DISABLE_LEVEL)) { llp_tx_retry_errors = 1; } } /* * If a non-zero error rate (which is equivalent to * to 100 errors/tick at least) for the LLP transmitter * retry interrupt was seen, check if we should print * a warning message. */ if (llp_tx_retry_errors) { static uint32_t last_printed_rate; if (errrate > last_printed_rate) { last_printed_rate = errrate; /* Print the warning only if the error rate * for the transmitter retry interrupt * exceeded the previously printed rate. */ printk(KERN_WARNING "%s: %s, Excessive error interrupts : %d/tick\n", pcibr_soft->bs_name, pcibr_isr_errs[i], errrate); } /* * Update snapshot, and time */ bs_estat->bs_lasterr_timestamp = current_tick; bs_estat->bs_lasterr_snapshot = bs_estat->bs_errcount_total; } /* * If the error rate is high enough, print the error rate. */ if (errinterval > PCIBR_ERRTIME_THRESHOLD) { if (errrate > PCIBR_ERRRATE_THRESHOLD) { printk(KERN_NOTICE "%s: %s, Error rate %d/tick", pcibr_soft->bs_name, pcibr_isr_errs[i], errrate); /* * Update snapshot, and time */ bs_estat->bs_lasterr_timestamp = current_tick; bs_estat->bs_lasterr_snapshot = bs_estat->bs_errcount_total; } } /* PIC BRINGUP WAR (PV# 856155): * Dont disable PCI_X_ARB_ERR interrupts, we need the * interrupt inorder to clear the DEV_BROKE bits in * b_arb register to re-enable the device. */ if (IS_PIC_SOFT(pcibr_soft) && !(err_status & PIC_ISR_PCIX_ARB_ERR) && PCIBR_WAR_ENABLED(PV856155, pcibr_soft)) { if (bs_estat->bs_errcount_total > PCIBR_ERRINTR_DISABLE_LEVEL) { /* * We have seen a fairly large number of errors of * this type. Let's disable the interrupt. But flash * a message about the interrupt being disabled. */ printk(KERN_NOTICE "%s Disabling error interrupt type %s. Error count %d", pcibr_soft->bs_name, pcibr_isr_errs[i], bs_estat->bs_errcount_total); disable_errintr_mask |= (1ull << i); } } /* PIC: WAR for PV 856155 end-of-if */ } } } if (disable_errintr_mask) { unsigned s; /* * Disable some high frequency errors as they * could eat up too much cpu time. */ s = pcibr_lock(pcibr_soft); if (IS_PIC_SOFT(pcibr_soft)) { bridge->p_int_enable_64 &= (picreg_t)(~disable_errintr_mask); } else { bridge->b_int_enable &= (bridgereg_t)(~disable_errintr_mask); } pcibr_unlock(pcibr_soft, s); } /* * If we leave the PROM cacheable, T5 might * try to do a cache line sized writeback to it, * which will cause a BRIDGE_ISR_INVLD_ADDR. */ if ((err_status & BRIDGE_ISR_INVLD_ADDR) && (0x00000000 == bridge->b_wid_err_upper) && (0x00C00000 == (0xFFC00000 & bridge->b_wid_err_lower)) && (0x00402000 == (0x00F07F00 & bridge->b_wid_err_cmdword))) { err_status &= ~BRIDGE_ISR_INVLD_ADDR; } /* * The bridge bug (PCIBR_LLP_CONTROL_WAR), where the llp_config or control registers * need to be read back after being written, affects an MP * system since there could be small windows between writing * the register and reading it back on one cpu while another * cpu is fielding an interrupt. If we run into this scenario, * workaround the problem by ignoring the error. (bug 454474) * pcibr_llp_control_war_cnt keeps an approximate number of * times we saw this problem on a system.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -