📄 ka650.c
字号:
* Called from locore thru the cpu switch in response to a trap at SCB 4 * We recover from any that we can if hardware "retry" is possible. */ka650machcheck (cmcf)caddr_t cmcf;{ register u_int type; register struct mcCVAXframe *mcf; register int *cacheptr; /* ptr to flush 2nd level cache */ register int *cacheend; /* ptr to end of 2nd level cache */ register int recover; /* set to 1 if we can recover from this error */ register u_int time; /* from TODR */ int cpunum; /* 0 for uniprocessor */ int retry; /* set to 1 if the hardware can retry the instr */ int ws_disp; /* type of work station display for ioctl call */ /* * Disable 2nd level cache to protect execution of machine check code. * NOTE: Do NOT put any executable code in this routine before * disabling the cache, the cache may be what caused the mcheck. */ if (cvqrom->cvq7_sysdep != SB_KA640) cvqcb->cvq2_cacr1 &= ~CACR_CEN; /* * Do not allow recursive machine check. * Halt the processor, then a restart should get a dump. */ if (ka650_mchkprog == 0) ka650_mchkprog = 1; else { asm("halt"); } type = ((struct mcframe *) cmcf)->mc_summary; mcf = (struct mcCVAXframe *) cmcf; recover = 0; cpunum = 0; retry = 0; /* * First note the time; then determine if hardware retry is * possible, which will be used for the recoverable cases. */ time = mfpr(TODR); if (((mcf->mc1_psl & PSL_FPD) != 0) || (((mcf->mc1_psl & PSL_FPD) == 0) && ((mcf->mc1_internal_state2 & IS2_VCR) == 0))) retry = 1; switch (type) { case 1: case 2: case 3: case 4: /* * CFPA errors * Re-enable 2nd level cache to log in correct state. * If fewer than 3 errors in 1 time period, try to recover * else we will crash. */ if (cache2_state) { cacheptr = cvqcache->cvq5_cache; cacheend = cvqcache->cvq5_cache + CACHE_SIZE; for (; cacheptr < cacheend; ) { *cacheptr++ = 0; } cvqcb->cvq2_cacr1 |= CACR_CEN; } if (time - cfpa_errcnt.cfpa_prev > TIME_THRESH) { if (retry) recover = 1; } logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); if (recover) { cfpa_errcnt.cfpa_prev = cfpa_errcnt.cfpa_last; cfpa_errcnt.cfpa_last = time; } else { ka650consprint(2,type,mcf); } break; case 5: case 6: case 7: case 8: case 9: case 10: /* * These are non-recoverable: * Re-enable 2nd level cache to log in correct state. * Log the mcheck & print mcheck frame to console. */ if (cache2_state) { cacheptr = cvqcache->cvq5_cache; cacheend = cvqcache->cvq5_cache + CACHE_SIZE; for (; cacheptr < cacheend; ) { *cacheptr++ = 0; } cvqcb->cvq2_cacr1 |= CACR_CEN; } logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650consprint(2,type,mcf); break; case 0x80: case 0x81: /* * There are several possible causes for this mcheck. * First re-enable the 2nd level cache if its not * a cache error and the cache was enabled before * we entered mcheck (for recovery and so we log it * in the state it was in when mcheck occured). * Check for which error caused the mcheck and take * appropriate action. */ if (((mfpr(MSER) & MSER_MCD) == 0) && cache2_state) { cacheptr = cvqcache->cvq5_cache; cacheend = cvqcache->cvq5_cache + CACHE_SIZE; for (; cacheptr < cacheend; ) { *cacheptr++ = 0; } cvqcb->cvq2_cacr1 |= CACR_CEN; } if ((mfpr(MSER) & MSER_MCD) != 0) { /* * CDAL Bus Parity Err/2nd Level Cache Data Parity Err. * See if 3 within 1 time period. */ if (time - cdal_errcnt.cdal_prev <= TIME_THRESH_C2) { /* * Got 3 errors within 1 time period. * Action depends on prior state of 2nd level cache: * was on: Try to Recover * was off: No Recover (always for ka640) */ if (cache2_state) { /* * Cache was on, 3 errs in 1 time period */ if (retry) recover = 1; else { /* * If console is a graphics device, * force printf messages directly to screen. */ if (ws_display_type) { ws_disp = ws_display_type << 8; (*cdevsw[ws_display_type].d_ioctl)(ws_disp, QD_KERN_UNLOOP, 0, 0); } } printf("2nd Level Cache DISABLED by software on mchk\n"); cache2_state = 0; cdal_errcnt.cdal_last = 0; time = 0; } /* * Note: 3 err/time period w/ cache disabled => panic */ } else { /* * Got fewer than 3 errors within 1 time period. * Action depends on prior state of 2nd level cache * was on: Flush cache & try to recover * was off: try to recover */ if (cache2_state) { /* * Cache was on, < 3 errs in 1 time period * Flush & Re-enable the 2nd level cache */ cacheptr = cvqcache->cvq5_cache; cacheend = cvqcache->cvq5_cache + CACHE_SIZE; for (; cacheptr < cacheend; ) { *cacheptr++ = 0; } cvqcb->cvq2_cacr1 |= CACR_CEN; mprintf("2nd level cache re-enabled by software on mcheck\n"); if (retry) recover = 1; } else { /* * Cache was off, < 3 errs in 1 time period */ if (retry) recover = 1; } } /* * Log the machine check, and the error status regs. * If we can recover update the times, * else print errors on the console. * Last, clear the error bits. */ logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); if (recover) { cdal_errcnt.cdal_prev = cdal_errcnt.cdal_last; cdal_errcnt.cdal_last = time; } else { ka650consprint(2,type,mcf); ka650consprint(3,0,0); } mtpr(MSER,1); } else if ((mfpr(MSER) & MSER_MCC) != 0) { /* * 1st Level Cache Parity Err (CPU disables & flushes). * If recovery is possible, do so, else log and quit. */ if (retry) { recover = 1; logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); mtpr(MSER,1); } else { logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650consprint(2,type,mcf); mtpr(MSER,1); break; /* out of switch */ } /* * Recovery procedures: */ if (time - cache_errcnt.cache_prev <= TIME_THRESH_C1) { /* * Got 3 errors within 1 time period. * Action depends on prior state of 1st level cache: * Reenable whichever cache sets were NOT on. * And reset the timers to require 3 * errs/time period with the new cache setting. */ switch (cache_state & CADR_SETMASK) { case SET_BOTH: cache_state &= ~(CVAX_SEN2); mtpr (CADR, cache_state); printf("1st Level Cache, Set 2 DISABLED, Set 1 Enabled by software on mchk\n"); cache_errcnt.cache_last = 0; time = 0; break; case SET_ONE: cache_state |= (CVAX_SEN2); cache_state &= ~(CVAX_SEN1); mtpr (CADR, cache_state); printf("1st Level Cache, Set 1 DISABLED, Set 2 Enabled by software on mchk\n"); cache_errcnt.cache_last = 0; time = 0; break; case SET_TWO: cache_state &= ~(CVAX_SEN2); /* * Set I & D stream for 2nd level * cache operation. */ mtpr (CADR, cache_state); printf("1st Level Cache Completely DISABLED by software on mchk\n"); cache_errcnt.cache_last = 0; time = 0; break; case SET_NONE: recover = 0; /* don't recover */ ka650consprint(2,type,mcf); printf("Got a 1st Level Cache Parity Error with the 1st Level Cache Disabled!\n"); break; default: cache_state = (CVAX_CEND | CVAX_CENI | CVAX_SEN1 | CVAX_SEN2); mtpr (CADR, cache_state); break; } } else { /* * Fewer than 3 errs in 1 time period, * reenable whichever cache sets were on. */ switch (cache_state & CADR_SETMASK) { case SET_BOTH: mtpr (CADR, cache_state); mprintf("1st Level Cache, Re-enabled by software on mchk\n"); break; case SET_ONE: mtpr (CADR, cache_state); mprintf("1st Level Cache, Set 1 Re-enabled, Set 2 left Disabled by software on mchk\n"); break; case SET_TWO: mtpr (CADR, cache_state); mprintf("1st Level Cache, Set 2 Re-enabled, Set 1 left Disabled by software on mchk\n"); break; case SET_NONE: recover = 0; /* don't recover */ ka650consprint(2,type,mcf); printf("Got a 1st Level Cache Parity Error with the 1st Level Cache Disabled!\n"); break; default: cache_state = (CVAX_CEND | CVAX_CENI | CVAX_SEN1 | CVAX_SEN2); mtpr (CADR, cache_state); mprintf("1st Level Cache, Re-enabled by software on mchk\n"); break; } } /* * Since we can recover update the times, */ cache_errcnt.cache_prev = cache_errcnt.cache_last; cache_errcnt.cache_last = time; } else if ((cvqmerr->cvq1_dser & DSER_NOGRANT) != 0) { /* * Q-22 Bus No Grant timeout on cpu demand R/W. * If fewer than 3 errs in 1 time period, try to recover * else we will crash. * If mcheck type is 80, no recover due to bug in * pass3 CVAX CPU chip which incorreclty reports * an mcheck 82 as 80. */ if (type == 0x80) recover = 0; else if (time - qngr_errcnt.qngr_prev > TIME_THRESH) { if (retry) recover = 1; } /* * Log the machine check, and the error status regs. * If we can recover update the times, * else print errors on the console. * Last, clear the error bits. */ logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); if (recover) { qngr_errcnt.qngr_prev = qngr_errcnt.qngr_last; qngr_errcnt.qngr_last = time; } else { ka650consprint(2,type,mcf); ka650consprint(3,0,0); } cvqmerr->cvq1_dser |= DSER_CLEAR; } else if ((cvqmerr->cvq1_memcsr16 & MEM_CDAL) != 0) { /* * CDAL Bus Parity Error * Log the mcheck, ESR Packet, & Mem Packet. * * Note: * This is really a machine check 82, but may * be incorrectly reported as 80 due to a bug * in the pass 3 CVAX CPU chip. */ if (time - cdalW_errcnt.cdalW_prev > TIME_THRESH) { if (retry) recover = 1; } logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); ka650logmempkt(recover); if (recover) { cdalW_errcnt.cdalW_prev = cdalW_errcnt.cdalW_last; cdalW_errcnt.cdalW_last = time; } else { ka650consprint(2,type,mcf); ka650consprint(3,0,0); ka650consprint(4,0,0); } cvqmerr->cvq1_memcsr16 |= MEM_EMASK; } else if ((cvqmerr->cvq1_memcsr16 & MEM_RDS) != 0) { /* * Main memory uncorrectable ECC error: * Disable Main Mem error detection & correction * to protect mcheck code & to get core dump. * Log the mcheck, ESR Packet, & Mem Packet. * We can't recover so print errors on the console. * NOTE: logmempkt sets the global var ka650_module * "ka650_module" is set to a coded number! * NOTE: consprint() sets up display for cprintf here! * Last, clear the error bits. */ cvqmerr->cvq1_memcsr17 |= MEM_ERRDIS; logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); ka650logmempkt(recover); ka650consprint(2,type,mcf); ka650consprint(3,0,0); cprintf("Uncorrectable ECC Error detected in Main Memory <%x>\n", ka650_module); ka650consprint(4,0,0); cvqmerr->cvq1_memcsr16 |= MEM_EMASK; cvqmerr->cvq1_dser |= DSER_CLEAR; } else if ((cvqmerr->cvq1_dser & DSER_QNXM) != 0) { /* * Q-22 Bus Non-existent Memory. * If fewer than 3 errs in 1 time period, try to recover * else we will crash. */ if (time - qnxm_errcnt.qnxm_prev > TIME_THRESH) { if (retry) recover = 1; } /* * Log the machine check, and the error status regs. * If we can recover update the times, * else print errors on the console. * Last, clear the error bits. */ logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); if (recover) { qnxm_errcnt.qnxm_prev = qnxm_errcnt.qnxm_last; qnxm_errcnt.qnxm_last = time; } else { ka650consprint(2,type,mcf); ka650consprint(3,0,0); } cvqmerr->cvq1_dser |= DSER_CLEAR; } else if ((cvqmerr->cvq1_dser & DSER_QPE) != 0) { /* * Q-22 Bus device parity error. * If fewer than 3 errs in 1 time period, try to recover * else we will crash. */ if (time - qpe_errcnt.qpe_prev > TIME_THRESH) { if (retry) recover = 1; } /* * Log the machine check, and the error status regs. * If we can recover update the times, * else print errors on the console. * Last, clear the error bits. */ logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); if (recover) { qpe_errcnt.qpe_prev = qpe_errcnt.qpe_last; qpe_errcnt.qpe_last = time; } else { ka650consprint(2,type,mcf); ka650consprint(3,0,0); } cvqmerr->cvq1_dser |= DSER_CLEAR; } else if ((cvqmerr->cvq1_dser & DSER_DNXM) != 0) { /* * DMA NXM * If fewer than 3 errs in 1 time period, try to recover * else we will crash. */ if (time - dnxm_errcnt.dnxm_prev > TIME_THRESH) { if (retry) recover = 1; } /* * Log the machine check, and the error status regs. * If we can recover update the times, * else print errors on the console. * Last, clear the error bits. */ logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); ka650logesrpkt(recover); if (recover) { dnxm_errcnt.dnxm_prev = dnxm_errcnt.dnxm_last; dnxm_errcnt.dnxm_last = time; } else { ka650consprint(2,type,mcf); ka650consprint(3,0,0); } cvqmerr->cvq1_dser |= DSER_CLEAR; } else { /* * Undefined Machine check 0x80, 0x81. * Log the mcheck, ESR Packet, & Mem Packet. * We can't recover so print errors on the console. */ logmck((int *)cmcf, ELMCKT_CVAX, cpunum, recover); mprintf("No primary error flag - unspecified error type\n"); ka650logesrpkt(recover); ka650logmempkt(recover); ka650consprint(2,type,mcf); cprintf("No primary error flag - unspecified error type\n"); ka650consprint(3,0,0); ka650consprint(4,0,0); cvqmerr->cvq1_memcsr16 |= MEM_EMASK; } break; case 0x82: case 0x83: /* * Re-enable (after flushing) the 2nd level cache if cache * was enabled before we entered mcheck (we may recover, plus * we log it in the state it was in when mcheck occured). */ if (cache2_state) { cacheptr = cvqcache->cvq5_cache; cacheend = cvqcache->cvq5_cache + CACHE_SIZE; for (; cacheptr < cacheend; ) { *cacheptr++ = 0; } cvqcb->cvq2_cacr1 |= CACR_CEN;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -