📄 kn01.c
字号:
pa = vatophys(ep[EF_BADVADDR]); if ( (int)pa != -1 && (btop((int)pa) < physmem) ) { /* * Note that we must save anything "interesting" * from the exception frame, since isolatepar() * may cause additional bus errors which will * stomp on the exception frame in locore. */ vaddr = ep[EF_BADVADDR]; epc = ep[EF_EPC]; memreg = isolatepar(pa, vaddr); ep[EF_BADVADDR] = vaddr; ep[EF_EPC] = epc; /* * If we get 3 or more in 1 second then disable logging * them for 15 minutes. The variable "kn01translog" * is set by the kn01transenable routine. */ if (((memreg >> TYPEOFF) & TRANSPAR) == TRANSPAR) { if (kn01translog) { currtime = time.tv_sec; if (currtime == trans_errcnt.trans_prev) { kn01translog = 0; mprintf("High rate of transient parity memory errors, logging disabled for 15 minutes\n"); trans_errcnt.trans_last = 0; currtime = 0; } pmaxlogmempkt(EL_PRIHIGH, ep, memreg, pa); trans_errcnt.trans_prev = trans_errcnt.trans_last; trans_errcnt.trans_last = currtime; } return(0); } if (SHAREDPG(pa)) { pmaxlogmempkt(EL_PRISEVERE, ep, memreg, pa); pmaxconsprint(MEMPKT, ep, memreg, pa); panic("memory parity error in shared page"); } else { pmaxlogmempkt(EL_PRIHIGH, ep, memreg, pa); printf("pid %d (%s) was killed on memory parity error\n", p->p_pid, u.u_comm); uprintf("pid %d (%s) was killed on memory parity error\n", p->p_pid, u.u_comm); } } else { uprintf("pid %d (%s) was killed on bus error\n", p->p_pid, u.u_comm); } } else { /* * Kernel mode errors. * They all panic, its just a matter of what we log * and what panic message we issue. */ switch (code) { case EXC_DBE: case EXC_IBE: /* * Figure out if its a memory parity error * or a read bus timeout error */ pa = vatophys(ep[EF_BADVADDR]); if ( (int)pa != -1 && (btop((int)pa) < physmem) ) { /* * Note that we must save anything "interesting" * from the exception frame, since isolatepar() * may cause additional bus errors which will * stomp on the exception frame in locore. */ vaddr = ep[EF_BADVADDR]; epc = ep[EF_EPC]; memreg = isolatepar(pa, vaddr); ep[EF_BADVADDR] = vaddr; ep[EF_EPC] = epc; pmaxlogmempkt(EL_PRISEVERE, ep, memreg, pa); pmaxconsprint(MEMPKT, ep, memreg, pa); panic("memory parity error in kernel mode"); } else { pmaxlogesrpkt(ep, EL_PRISEVERE); pmaxconsprint(ESRPKT, ep, 0, 0); panic("bus timeout"); } break; case EXC_CPU: pmaxlogesrpkt(ep, EL_PRISEVERE); pmaxconsprint(ESRPKT, ep, 0, 0); panic("coprocessor unusable"); break; case EXC_RADE: case EXC_WADE: pmaxlogesrpkt(ep, EL_PRISEVERE); pmaxconsprint(ESRPKT, ep, 0, 0); panic("unaligned access"); break; default: pmaxlogesrpkt(ep, EL_PRISEVERE); pmaxconsprint(ESRPKT, ep, 0, 0); panic("trap"); break; } } /* * Default user-mode action is to terminate the process */ *signo = SIGBUS; return(0);}unsigned sbe_addr;int memintr_cnt = 0;/* * Bus timeout on write. * Caused by memory failure, or write to a non-existent address * * This does not happen synchronously (buffered write), * therefor we are not in process context and cannot terminate * a user process. We must crash the system. * * Video interrupt is on the same interrupt line so it comes here too. */kn01memintr(ep) u_int *ep; /* exception frame ptr */{ register volatile short *pm_csr; register short pmcsr; caddr_t pa; /* the physical addr of error */ pm_csr = PM_CSR_ADDR; pmcsr = *pm_csr; memintr_cnt++; if (pmcsr & PM_CSR_MEMERR) { /* * clear the pending bus error and save the address away * for post-mortems. */ *pm_csr = PM_CSR_MEMERR|pmcsr|0x00ff; sbe_addr = *(volatile unsigned *)PHYS_TO_K1(SBE_ADDR); /* * Figure out if its a failed write to memory. * or a write to a bad address. */ pa = vatophys(ep[EF_BADVADDR]); if ((int)pa != -1 && (btop((int)pa) < physmem) ) { pmaxlogesrpkt(ep, EL_PRISEVERE); pmaxconsprint(ESRPKT, ep, 0, 0); panic("memintr, memory failure"); } else { pmaxlogesrpkt(ep, EL_PRISEVERE); pmaxconsprint(ESRPKT, ep, 0, 0); panic("memintr, write timeout"); } } else { *pm_csr = PM_CSR_VINT|pmcsr|0x00ff; } return(0);}/* * Log Error & Status Registers to the error log buffer */pmaxlogesrpkt(ep, priority) register u_int *ep; /* exception frame ptr */ int priority; /* for pkt priority */{ struct el_rec *elrp; elrp = ealloc(sizeof(struct el_esrpmax), priority); if (elrp != NULL) { LSUBID(elrp,ELCT_ESRPMAX,EL_UNDEF,EL_UNDEF,EL_UNDEF,EL_UNDEF,EL_UNDEF); elrp->el_body.elesrpmax.esr_cause = ep[EF_CAUSE]; elrp->el_body.elesrpmax.esr_epc = ep[EF_EPC]; elrp->el_body.elesrpmax.esr_status = ep[EF_SR]; elrp->el_body.elesrpmax.esr_badva = ep[EF_BADVADDR]; elrp->el_body.elesrpmax.esr_sp = ep[EF_SP]; EVALID(elrp); }}/* * Log a memory error packet, so uerf can find it as a main memory error. */pmaxlogmempkt(priority, ep, memreg, pa) int priority; /* pkt priority: panic: severe; else: high */ register u_int *ep; /* exception frame ptr */ unsigned memreg; /* assorted parity error info */ int pa; /* physical addr where memory err occured */{ struct el_rec *elrp; register struct el_mem *mrp; elrp = ealloc(EL_MEMSIZE, priority); if (elrp != NULL) { LSUBID(elrp,ELCT_MEM,EL_UNDEF,ELMCNTR_PMAX,EL_UNDEF,EL_UNDEF,EL_UNDEF); mrp = &elrp->el_body.elmem; mrp->elmem_cnt = 1; mrp->elmemerr.cntl = 1; mrp->elmemerr.type = ELMETYP_PAR; mrp->elmemerr.numerr = 1; mrp->elmemerr.regs[0] = memreg; mrp->elmemerr.regs[1] = pa; mrp->elmemerr.regs[2] = ep[EF_EPC];; mrp->elmemerr.regs[3] = ep[EF_BADVADDR];; EVALID(elrp); }}/* * Print error packet to the console. * This is only done when we are about to panic on the error. * * Note: side-effect. * If console is a graphics device, printstate is changed to force * kernel printfs directly to the screen. */pmaxconsprint(pkt, ep, memreg, pa) int pkt; /* error pkt: Error & Stat Regs / memory pkt */ register u_int *ep; /* exception frame ptr */ unsigned memreg; /* For MEMPKT: assorted parity error info */ unsigned pa; /* For MEMPKT: physical addr of error */ { register int i; int ws_disp; int simm; int byte; /* * If console is a graphics device, * force printf messages directly to screen. */ printstate |= PANICPRINT; switch (pkt) { case ESRPKT: cprintf("\nException condition\n"); cprintf("\tCause reg\t= 0x%x\n", ep[EF_CAUSE]); cprintf("\tException PC\t= 0x%x\n", ep[EF_EPC]); cprintf("\tStatus reg\t= 0x%x\n", ep[EF_SR]); cprintf("\tBad virt addr\t= 0x%x\n", ep[EF_BADVADDR]); break; case MEMPKT: cprintf("\nMemory Parity Error\n"); simm = (memreg >> SIMMOFF) & 0xf; cprintf("\tSIMM (module number)\t= %d\n", simm); if (((memreg >> TYPEOFF) & HARDPAR) == HARDPAR) cprintf("\tHard error\t\n"); else if (((memreg >> TYPEOFF) & SOFTPAR) == SOFTPAR) cprintf("\tSoft error\t\n"); else cprintf("\tTransient error\t\n"); if (simm & 0x1) { /* odd simm: low half word */ if ((memreg >> BYTEOFF) & 0x1) byte = 1; else byte = 0; } else { /* even simm: high half word */ if ((memreg >> BYTEOFF) & 0x1) byte = 3; else byte = 2; } cprintf("\tByte in error (0-3)\t= %d\n", byte); cprintf("\t%s bit error\n", ((memreg >> DPOFF) & 0x1) ? "Parity" : "Data"); cprintf("\tTransient errors for this SIMM\t= %d\n", tcount[simm]); cprintf("\tSoft errors for this SIMM\t= %d\n", scount[simm]); cprintf("\tHard errors for this SIMM\t= %d\n", hcount[simm]); cprintf("\tPhysical address of error\t= 0x%x\n", pa); cprintf("\tException PC\t\t\t= 0x%x\n", ep[EF_EPC]); cprintf("\tVirtual address of error\t= 0x%x\n", ep[EF_BADVADDR]); break; default: cprintf("bad consprint\n"); break; }}/* * Isolate a memory parity error to which SIMM is in error. * This routine is machine specific, in that it "knows" how the memory * is laid out, i.e. how to convert a physical address to a module number. * * Block faults from occuring while we isolate the parity error by using * "nofault" facility thru the bbadaddr routine. */unsignedisolatepar(pa, va) register caddr_t pa; /* the phys addr to convert to a SIMM */ caddr_t va; /* the virtual addr of the error */ { register int i; /* loop index */ register char *addr; /* increment thru the word w/ parity error */ register char *k1_addr; /* kseg1 addr. for mem test writes */ unsigned memreg; /* collection of memory error info */ int odd; /* true if its the odd numbered SIMM */ int simm; /* which simm had the error */ register int allzeros; /* true if parity err occurs on all 0's write */ register int allones; /* true if parity err occurs on all 1's write */ register int oneone; /* true if parity err occurs on 1 1 write */ int dp; /* 0 for data bit, 1 for parity bit */ int type; /* error type: transient, soft, hard */ int byte; /* 0 for low byte; 1 for high byte in word */ /* * Round address down to long word, & clear flags. */ addr = (char *)((int)va & (~0x3)); type = 0; dp = 0; /* * Do badaddr probe on addr (a few times), * to see if it was only a transient. */ parityerr = 0; for (i = 0; i < 4; i++) { if (bbadaddr(addr, 4)) { parityerr = 1; break; } } if (!parityerr) { type = TRANSPAR; byte = 0; odd = 1; goto getsimm; } /* * Isolate the parity error to which SIMM is in error (which byte in * the word) and isolate the type of error: soft or hard, data bit * or parity bit. * * This is done by writing (& reading) each byte in the word first * with all 0's then with all 1's (0xff) then with one 1 (0x1). * * use k1_addr in order not to get TLBMOD exception when writing * shared memory space */ k1_addr = (char *)(PHYS_TO_K1(((int)pa & (~0x3)))); /* lw addr */ for (i = 0; i < 4; i++, addr += 1, k1_addr += 1) { allzeros = 0; *k1_addr = 0x00; if (bbadaddr(addr, 1)) allzeros = 1; allones = 0; *k1_addr = 0xff; if (bbadaddr(addr, 1)) allones = 1; oneone = 0; *k1_addr = 0x1; if (bbadaddr(addr, 1)) oneone = 1; /* * If all 3 reads caused the error then this is the wrong * byte, go on to the next byte */ if (allzeros && allones && oneone) continue; /* * If only one of the allones/allzeros patterns caused a * parity error, then we have a hard data bit stuck to * zero or one. */ if ((allzeros && !allones && !oneone) || (allones && !allzeros && !oneone)) { type = HARDPAR; break; } /* * If only the "oneone" (0x1) pattern caused a parity error, * then we have a parity bit stuck to zero. * If only the "oneone" (0x1) pattern did NOT cause a parity * error then we have a parity bit stuck to one. */ if ((oneone && !allzeros && !allones) || (allzeros && allones && !oneone)) { type = HARDPAR; dp = 1; break; } /* * If no parity error on all 3 patterns then we had a soft * parity error in one of the data bits or in the parity bit * of this byte. */ if (!allzeros && !allones && !oneone) { type = SOFTPAR; break; } } /* * If i is 0 or 1, parity error is on the odd SIMM. * If i is 2 or 3, parity error is on the even SIMM. * Also record high or low byte position in half-word. */ switch (i) { case 0: byte = 0; odd = 1; break; case 1: byte = 1; odd = 1; break; case 2: byte = 0; odd = 0; break; case 3: default: byte = 1; odd = 0; break; }getsimm: /* * Record which SIMM: 4 Mbytes per SIMM */ if ((int)pa < 4*1024*1024) if (odd) simm = 1; else simm = 2; else if ((int)pa < 8*1024*1024) if (odd) simm = 3; else simm = 4; else if ((int)pa < 12*1024*1024) if (odd) simm = 5; else simm = 6; else if ((int)pa < 16*1024*1024) if (odd) simm = 7; else simm = 8; else if ((int)pa < 20*1024*1024) if (odd) simm = 9; else simm = 10; else if (odd) simm = 11; else simm = 12; /* * Increment error counts */ switch (type) { case TRANSPAR: default: tcount[simm]++; if (tcount[simm] > 255) { mprintf("Transient parity error count on simm # %d reached 255, reset to zero.\n", simm); tcount[simm] = 0; } break; case SOFTPAR: scount[simm]++; break; case HARDPAR: hcount[simm]++; break; } memreg = MEMREGFMT(simm, type, byte, dp, tcount[simm], scount[simm], hcount[simm]); return(memreg);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -