📄 kn02.c
字号:
chksyn_plus |= (0 << CPLUS_EOFF); chksyn_plus |= (0 << CPLUS_MOFF); } else { chksyn_plus |= (kn02memerrs[module] << CPLUS_EOFF); chksyn_plus |= (module << CPLUS_MOFF); } chksyn_plus &= ~CPLUS_VALID; if (((chksyn & CHKSYN_VLDLO) && (chksyn & CHKSYN_SNGLO)) || ((chksyn & CHKSYN_VLDHI) && (chksyn & CHKSYN_SNGHI))) { /* * Single bit ECC error (CRD) * If we get 3 or more in 1 second then disable logging * them for 15 minutes. The variable "kn02stopcrdlog" * is cleared by the kn02crdenable routine. */ if (kn02crdlog) { currtime = time.tv_sec; if (currtime == crd_errcnt.crd_prev) { kn02crdlog = 0; mprintf("High rate of corrected single-bit ECC errors, logging disabled for 15 minutes\n"); crd_errcnt.crd_last = 0; currtime = 0; } kn02logmempkt(EL_PRIHIGH, ep, ELMETYP_CRD, kn02csr, erradr, chksyn_plus); crd_errcnt.crd_prev = crd_errcnt.crd_last; crd_errcnt.crd_last = currtime; } /* * Scrub the single bit error */ *(u_int *)PHYS_TO_K0(pa) = *(u_int *)PHYS_TO_K0(pa); *(u_int *)PHYS_TO_K1(KN02ERR_ADDR) = 0; wbflush(); } else { /* * Multibit error, panic. */ kn02logmempkt(EL_PRISEVERE, ep, ELMETYP_RDS, kn02csr, erradr, chksyn_plus); kn02consprint(MEMPKT, ep, kn02csr, erradr, chksyn_plus); *(u_int *)PHYS_TO_K1(KN02ERR_ADDR) = 0; kn02eccpanic = 1; wbflush(); panic("multibit memory ECC error"); } break; case ERR_DMAWOVR: case ERR_DMAROVR: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, erradr); kn02consprint(ESR_INTR_PKT, ep, kn02csr, erradr, 0); *(u_int *)PHYS_TO_K1(KN02ERR_ADDR) = 0; wbflush(); panic("DMA overrun"); break; case ERR_WTMO: /* * if we are still processing an previous interrupt * then simply crash. we don't queue these interrupts. */ if (CURRENT_CPUDATA->cpu_wto_event) { kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, erradr); kn02consprint(ESR_INTR_PKT, ep, kn02csr, erradr, 0); *(u_int *)PHYS_TO_K1(KN02ERR_ADDR) = 0; wbflush(); panic("CPU write timeout"); } else { /* * capture error information in kn02consinfo. * softnet() interrupt will print this info * if panicing on the console. */ pcons = &kn02consinfo; pcons->pkt_type = ESR_INTR_PKT; pcons->pkt.intrp.cause = ep[EF_CAUSE]; pcons->pkt.intrp.sr = ep[EF_SR]; pcons->pkt.intrp.sp = ep[EF_SP]; pcons->pkt.intrp.csr = kn02csr; pcons->pkt.intrp.erradr = erradr; /* * capture log information in kn02log_errinfo. * softnet() interrupt will log this info * if panicing in the error log buffer. */ plog = &kn02log_errinfo; plog->pkt_type = ESR_INTR_PKT; plog->cause = ep[EF_CAUSE]; plog->sr = ep[EF_SR]; plog->sp = ep[EF_SP]; plog->epc = ep[EF_EPC]; plog->badvaddr = ep[EF_BADVADDR]; plog->csr = kn02csr; plog->erradr = erradr; CURRENT_CPUDATA->cpu_consinfo = (char *) &kn02consinfo; CURRENT_CPUDATA->cpu_log_errinfo = (char *) &kn02log_errinfo; CURRENT_CPUDATA->cpu_wto_pfn = btop((erradr & ERR_ADDR) << 2); CURRENT_CPUDATA->cpu_wto_event = 1; *(u_int *)PHYS_TO_K1(KN02ERR_ADDR) = 0; wbflush(); setsoftnet(); } break; case ERR_UKN: default: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, erradr); kn02consprint(ESR_BUS_PKT, ep, kn02csr, erradr, 0); cprintf("\tChecksyn register\t= 0x%x\n", chksyn); *(u_int *)PHYS_TO_K1(KN02ERR_ADDR) = 0; wbflush(); panic("Unknown memory error interrupt"); break; } return(0);}/* * Routine to handle trap errors: user-mode ibe & dbe, & all kernel mode traps. * We try to recover from user-mode errors and panic on kernel mode errors. * * Entry conditions: * kn02erradr and kn02chksyn are set in locore on bus errors * (VEC_dbe and VEC_ibe) to be a copy of the hardware registers. */kn02trap_error(ep, code, sr, cause, signo) register u_int *ep; /* exception frame ptr */ u_int code; /* trap code (trap type) */ u_int sr, cause; /* status and cause regs */ int *signo; /* set if we want to kill process */{ caddr_t pa; /* the physical addr of the error */ register struct proc *p; /* ptr to current proc struct */ register u_int kn02csr; /* copy of csr reg */ register u_int erradr; /* copy of erradr reg */ register u_int chksyn; /* copy of chksyn reg */ register u_int chksyn_plus; /* chksyn + pc valid bit & err count */ int errtype; /* local record of error type */ int module; /* module number with error */ int kn02_physmem; /* adjusted physmem value */ p = u.u_procp; kn02csr = *(u_int *)PHYS_TO_K1(KN02CSR_ADDR); erradr = kn02erradr; chksyn = kn02chksyn; if ((erradr & ERR_VALID) && ((erradr & ERR_TYPE) == ERR_RECC) && (((chksyn & CHKSYN_VLDLO) && ((chksyn & CHKSYN_SNGLO) == 0))|| ((chksyn & CHKSYN_VLDHI) && ((chksyn & CHKSYN_SNGHI) == 0)))) errtype = ERR_RECC; /* Multibit memory read ECC */ else if ((erradr & ERR_VALID) && ((erradr & ERR_TYPE) == ERR_RTMO)) errtype = ERR_RTMO; /* CPU read timeout */ else errtype = ERR_UKN; /* * If nvram present adjust physmem value to include the 1Mbyte * of NVRAM which is located in memory. physmem naturally doesn't know * about the NVRAM because it is not marked in the bitmap like * regular memory. Note physmem is in pages, so 1MB is 256 pages. * Also, there is a small window before kn02_nvram_found is set in * the configure() routine, if we take an ECC error in this window, * it will be handled properly, but logged incorrectly, so a * multi-bit ECC error would be logged as taking place on a non * existent memory module, single bits will just be corrected and * logged. * */ if (kn02_nvram_found) kn02_physmem = physmem + 256; else kn02_physmem = physmem; if (USERMODE(sr)) { switch (errtype) { case ERR_RECC: pa = vatophys(ep[EF_BADVADDR]); if ( (int)pa != -1 && (btop((int)pa) < kn02_physmem) ) if (kn02csr & KN02CSR_BNK32M) module = (int)pa / (32*(1024*1024)); else module = (int)pa / ( 8*(1024*1024)); else module = -1; if (module >= 0 && module < MEM_MODULES) { kn02memerrs[module]++; if (kn02memerrs[module] > MAXERRCNT) { kn02memerrs[module] = 0; mprintf("Error count on memory module %d reached %d, resetting count to zero.\n", module, MAXERRCNT); } } else module = -1; erradr = (erradr & (~ERR_COLADDR)) | (((int)erradr -5) & ERR_COLADDR); chksyn_plus = 0; if (chksyn & CHKSYN_VLDLO) { chksyn_plus = (chksyn & CPLUS_CHK); } else chksyn_plus = ((chksyn >> CPLUS_MOFF) & CPLUS_CHK); if (module == -1) chksyn_plus |= (0 << CPLUS_EOFF) | (0 < CPLUS_MOFF) | CPLUS_VALID; else chksyn_plus |= (kn02memerrs[module] << CPLUS_EOFF) | (module < CPLUS_MOFF) | CPLUS_VALID; if (SHAREDPG(pa)) { kn02logmempkt(EL_PRISEVERE, ep, ELMETYP_RDS, kn02csr, erradr, chksyn_plus); kn02consprint(MEMPKT, ep, kn02csr, erradr, chksyn_plus); kn02eccpanic = 1; if (module == -1) panic("multibit ECC error reported on non-existent memory module"); else panic("multibit memory ECC error in shared page"); } else { kn02logmempkt(EL_PRISEVERE, ep, ELMETYP_RDS, kn02csr, erradr, chksyn_plus); printf("pid %d (%s) was killed on multibit memory ECC error\n", p->p_pid, u.u_comm); uprintf("pid %d (%s) was killed on multibit memory ECC error\n", p->p_pid, u.u_comm); } break; case ERR_RTMO: uprintf("pid %d (%s) was killed on CPU read bus timeout\n", p->p_pid, u.u_comm); break; case ERR_UKN: default: uprintf("pid %d (%s) was killed on unknown bus error\n", p->p_pid, u.u_comm); break; } } else { /* * Kernel mode errors. * They all panic, its just a matter of what we log * and what panic message we issue. */ switch (code) { case EXC_DBE: /* * If we are already dumping from an ECC error * and we get a data bus CPU read ECC error, then * just ignore this error. */ if ((errtype == ERR_RECC) && kn02eccpanic) { return(0); } /* fall thru */ case EXC_IBE: switch (errtype) { case ERR_RECC: /* Multibit memory read ECC */ pa = vatophys(ep[EF_BADVADDR]); if ( (int)pa != -1 && (btop((int)pa) < kn02_physmem) ) { if (kn02csr & KN02CSR_BNK32M) module = (int)pa / (32*(1024*1024)); else module = (int)pa / ( 8*(1024*1024)); } else module = -1; if (module >= 0 && module < MEM_MODULES) { kn02memerrs[module]++; if (kn02memerrs[module] > MAXERRCNT) { kn02memerrs[module] = 0; mprintf("Error count on memory module %d reached %d, resetting count to zero.\n", module, MAXERRCNT); } } else module = -1; erradr = (erradr & (~ERR_COLADDR)) | (((int)erradr -5) & ERR_COLADDR); chksyn_plus = 0; if (chksyn & CHKSYN_VLDLO) { chksyn_plus = (chksyn & CPLUS_CHK); } else chksyn_plus = ((chksyn >> CPLUS_MOFF) & CPLUS_CHK); if (module == -1) chksyn_plus |= (0 << CPLUS_EOFF) | (0 < CPLUS_MOFF) | CPLUS_VALID; else chksyn_plus |= (kn02memerrs[module] << CPLUS_EOFF) | (module < CPLUS_MOFF) | CPLUS_VALID; kn02logmempkt(EL_PRISEVERE, ep, ELMETYP_RDS, kn02csr, erradr, chksyn_plus); kn02consprint(MEMPKT, ep, kn02csr, erradr, chksyn_plus); kn02eccpanic = 1; if (module == -1) panic("multibit ECC error reported on non-existent memory module"); else panic("multibit memory ECC error"); break; case ERR_RTMO: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, erradr); kn02consprint(ESR_BUS_PKT, ep, kn02csr, erradr, 0); panic("CPU read bus timeout"); break; case ERR_UKN: default: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, erradr); kn02consprint(ESR_BUS_PKT, ep, kn02csr, erradr, 0); panic("Unknown bus timeout"); break; } break; case EXC_CPU: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, 0); kn02consprint(ESR_BUS_PKT, ep, kn02csr, 0, 0); panic("coprocessor unusable"); break; case EXC_RADE: case EXC_WADE: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, 0); kn02consprint(ESR_BUS_PKT, ep, kn02csr, 0, 0); panic("unaligned access"); break; default: kn02logesrpkt(EL_PRISEVERE, ep, kn02csr, 0); kn02consprint(ESR_BUS_PKT, ep, kn02csr, 0, 0); panic("trap"); break; } } /* * Default user-mode action is to terminate the process */ *signo = SIGBUS; return(0);}#define KN02_LOG_ESRPKT(elrp, cause,epc,sr,badvaddr,sp,csr,erradr) \ elrp->el_body.elesr.elesr.el_esrkn02.esr_cause = cause; \ elrp->el_body.elesr.elesr.el_esrkn02.esr_epc = epc; \ elrp->el_body.elesr.elesr.el_esrkn02.esr_status = sr; \ elrp->el_body.elesr.elesr.el_esrkn02.esr_badva = badvaddr; \ elrp->el_body.elesr.elesr.el_esrkn02.esr_sp = sp; \ elrp->el_body.elesr.elesr.el_esrkn02.esr_csr = csr; \ elrp->el_body.elesr.elesr.el_esrkn02.esr_erradr = erradr; \/* * Log Error & Status Registers to the error log buffer */kn02logesrpkt(priority, ep, kn02csr, erradr) int priority; /* for pkt priority */ register u_int *ep; /* exception frame ptr */ u_int kn02csr; u_int erradr;{ struct el_rec *elrp; elrp = ealloc(sizeof(struct el_esr), priority); if (elrp != NULL) { LSUBID(elrp,ELCT_ESR,ELESR_kn02,EL_UNDEF,EL_UNDEF,EL_UNDEF,EL_UNDEF); KN02_LOG_ESRPKT(elrp, ep[EF_CAUSE], ep[EF_EPC], ep[EF_SR], ep[EF_BADVADDR], ep[EF_SP], kn02csr, erradr); EVALID(elrp); }}/* * Log a memory error packet, so uerf can find it as a main memory error. */kn02logmempkt(priority, ep, type, kn02csr, erradr, chksyn_plus) int priority; /* pkt priority: panic: severe; else: high */ register u_int *ep; /* exception frame ptr */ int type; /* error type: RDS, CRD, DMAOVR */ u_int kn02csr; /* copy of kn02csr to log */ u_int erradr; /* copy of erradr to log */ u_int chksyn_plus; /* chksyn + error count + pc valid bit */{ struct el_rec *elrp; register struct el_mem *mrp; elrp = ealloc(EL_MEMSIZE, priority); if (elrp != NULL) { LSUBID(elrp,ELCT_MEM,EL_UNDEF,ELMCNTR_kn02,EL_UNDEF,EL_UNDEF,EL_UNDEF); mrp = &elrp->el_body.elmem; mrp->elmem_cnt = 1; mrp->elmemerr.cntl = 1; mrp->elmemerr.type = type; mrp->elmemerr.numerr = 1; mrp->elmemerr.regs[0] = ep[EF_EPC]; mrp->elmemerr.regs[1] = kn02csr; mrp->elmemerr.regs[2] = erradr; mrp->elmemerr.regs[3] = chksyn_plus; EVALID(elrp); }}/* * Logs error information to the error log buffer. * Exported through the cpu switch. */kn02_log_errinfo(p)struct kn02log_errinfo_t *p;{ struct el_rec *elrp; switch (p->pkt_type) { case ESR_INTR_PKT: elrp = ealloc(sizeof(struct el_esr), EL_PRISEVERE); if (elrp != NULL) { LSUBID(elrp,ELCT_ESR,ELESR_kn02,EL_UNDEF,EL_UNDEF,EL_UNDEF,EL_UNDEF); KN02_LOG_ESRPKT(elrp, p->cause, p->epc, p->sr, p->badvaddr, p->sp, p->csr, p->erradr); EVALID(elrp); } break; default: cprintf("bad pkt type\n"); return; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -