📄 translator.c
字号:
ASSERT( start + DO_CALLOUT_LEN == memptr); trans->fp_tested = 0;}/*----------------------------------------------------------------------------- * * This section provides support for memory system simulation * *---------------------------------------------------------------------------*//* Input: Addr in A0 Output: Translated addr in SIM_T1*/static void Page_D_Cache_Check( TransState *trans, char new_state, int init_reg ){ if (embra.inlineQC) { /* Compute vpn (word aligned) into SIM_T1 */ ECsh(srl_op_, SIM_T1, init_reg, 12); ECsh(sll_op_, SIM_T1, SIM_T1, 2 ); /* Use vpn to index into TLB array */ ECs(addu_op_, SIM_T1, MMU_REG, SIM_T1); /* Load the (shifted, K0 offset) physical page */ ECi(lw_op_, SIM_T1, SIM_T1, 0); /* Get the offset from the VA */ ECi(andi_op_, SIM_T4, init_reg, DEFAULT_PAGESZ-1 ); /* Correct the cycle count */ ECi(addiu_op_, A3, G0, trans->cycle_correction); if( new_state == MEM_D_EXCLUSIVE ) { ECb(bltz_op_, SIM_T1, 9 ); /* was 7 */ } else { /* MEM_SHARED */ ECi(bne_op_, G0, SIM_T1, 9); } ECnop; /* Insert PC into state structure */ ECi(ADDR_ADDI_OP, V0, PC_REG, COMPOSE_PC(trans) - trans->instrGrp->virt_pc); ECi(REG_ST_OP, V0, VSS_BASE, PC_OFF); ECi(addiu_op_, A1, G0, new_state); /* Because we are in the same segment as TC, and we are jumping to */ /* an assembly routine, we can just use the bits directly */ ECj(jal_op_, mem_ref_wrapper); ECnop; /* ugh - zero t4 for or (below) -BL */ ECi(ori_op_, SIM_T4, 0, 0); VCTARGET; /* branch target for vcode */ /* Note this delay slot instruction is needed if we DONT call out */ /* Or the offset and physical page together */ ECs(or_op_, SIM_T1, SIM_T1, SIM_T4); /* Clear upper bit */ ECs(and_op_, SIM_T1, MMUMASK_REG, SIM_T1); } else { ECi(addiu_op_, A3, G0, trans->cycle_correction); /* Correct the cycle count */ ECi(ADDR_ADDI_OP, V0, PC_REG, COMPOSE_PC(trans) - trans->instrGrp->virt_pc); /* ECi( addiu_op_, A3, G0, cycle_correction );*/ switch( new_state ){ default: case MEM_I_EXCLUSIVE: case MEM_I_SHARED: CPUError("MemIExclusive\n"); ASSERT(0); break; /* ECj(jal_op_, Em_dynPQCish); * break; */ case MEM_D_SHARED: ECj(jal_op_, Em_dynPQCdsh); break; case MEM_D_EXCLUSIVE: ECj(jal_op_, Em_dynPQCdex); break; } ECnop; }}/* * Cache_D_Cache_Check: * * This is the check for cache mode. We have two methods of simulating the * cache, the Virtual Quick Check (VQC) and the regular cache check. * * If we are using the VQC, we index (by cache line) into the VQC array * (pointed to by QC_REG), and get a status byte. * * We also do the virtual address translation here, indexing into the TLB * array (by VPN) and getting the physical page address. * * If we miss in the TLB or the cache, we call out to phys_mem_ref_wrapper, * which raises a TLB miss exception or handles the cache miss. Then * once everything has been handled, we rewind the quick check (we passed * in a rewind value telling it the size of this code) by adding an offset * (plus a delta for the jr/delay slot) to the ra and doing a jr ra. */static void Cache_D_Cache_Check( TransState *trans,char new_state, int init_reg){ static v_label_type vqc_hit, mmu_or_cache_miss, cache_hit; /* labels for branches */#if defined(SIM_MIPS64) CPUError("Cache mode doesn't work on 64bit\n");#endif vqc_hit = v_genlabel(); /* allocate labels */ mmu_or_cache_miss = v_genlabel(); if (embra.useVQC){ SET_LABEL(rewind_dqc ); /* rewind_dqc => offset of first instruction */ ECsh( srl_op_, SIM_T1, init_reg, log2SCACHE_LINE_SIZE ); /* Virtual Cache line number ->SIM_T1 */ ECs( addu_op_, SIM_T2, SIM_T1, QC_REG ); /* (mem_state + (addr>>LOG2SCACHE_LINE_SIZE)) -> SIM_T2 */ ECi( lb_op_, SIM_T2, SIM_T2, 0 ); /* Load Status Byte into SIM_T2 */ ECsh( srl_op_, SIM_T1, init_reg, 12); /* Compute VPN (word aligned) into SIM_T1 */ ECsh( sll_op_, SIM_T1, SIM_T1, 2); ECs( addu_op_, SIM_T1, MMU_REG, SIM_T1 ); /* Use VPN to index into TLB array */ ECi( lw_op_, SIM_T4, SIM_T1, 0 ); /* Load the (shifted, K0 offset) physical page */ ECi( andi_op_, SIM_T1, init_reg, DEFAULT_PAGESZ-1); /* Get the offset from the VA */ ECs(or_op_, SIM_T1, SIM_T1, SIM_T4); /* Or the offset and physical page together - * needed if we DON'T call out */ if( new_state == MEM_D_EXCLUSIVE ) { v_bltii( VREGS[SIM_T2], 0, vqc_hit); /* MEM_D_EXCLUSIVE: write, branch on negative */ } else { v_bneii( VREGS[SIM_T2], 0, vqc_hit); /* MEM_SHARED: read, go if non-zero */ } ECnop; ECi(ADDR_ADDI_OP, V0, PC_REG, /* Insert PC into A2, and jump to mem_ref_wrapper */ COMPOSE_PC(trans) - trans->instrGrp->virt_pc); ECi(REG_ST_OP, V0, VSS_BASE, PC_OFF); ECi(addiu_op_, A3, G0, trans->cycle_correction); /* Correct the cycle count */ ECi(addiu_op_, A1, G0, new_state); if( embra.sequential ) { ECi(addiu_op_, SIM_T2, G0, 0); /* In MPinUP no need to rewind quick check */ } else { ECilab(addiu_op_, SIM_T2, G0, USE_LABEL_VALUE(rewind_dqc)); } ECj(jal_op_, phys_mem_ref_wrapper); /* routine is within jump range */ ECnop; v_label(vqc_hit); /* branch target for vcode */ } else { /* !embra.useVQC */ ECsh(srl_op_, SIM_T1, init_reg, 12); /* Compute vpn (word aligned) into SIM_T1 */ ECsh(sll_op_, SIM_T1, SIM_T1, 2 ); ECs(addu_op_, SIM_T1, MMU_REG, SIM_T1); /* Use vpn to index into TLB array */ ECi(lw_op_, SIM_T1, SIM_T1, 0); /* Load the (shifted, K0 offset) physical page */ ECi(andi_op_, SIM_T4, init_reg, DEFAULT_PAGESZ-1 ); /* Get the offset from the VA */ ECs(or_op_, SIM_T1, SIM_T1, SIM_T4); /* Or the offset and physical page together * - needed if we hit in TLB */ if( new_state == MEM_D_EXCLUSIVE ) { /* Data_Write */ v_bgeii( VREGS[SIM_T1], 0, mmu_or_cache_miss); /* write protect -> neg mmu-entry */ } else { /* MEM_SHARED: Data_Read */ v_beqii( VREGS[SIM_T1], 0, mmu_or_cache_miss); /* branch on MMU miss */ } ECnop; /* MMU hit -> check physarray ( phys addr. is in SIM_T1 )*/ ECs( and_op_, SIM_T1, MMUMASK_REG, SIM_T1); /* delete protection bit of MMU entry */ ECsh( srl_op_, SIM_T2, SIM_T1, log2SCACHE_LINE_SIZE); /* SIM_T2 = cache line index */ ECs( addu_op_, SIM_T2, SIM_T2, PA_REG ); /* physical line number : ST1 , ST2 = PA_REG + ST2 */ ECi( lb_op_, SIM_T2, SIM_T2, 0 ); /* Load PA entry byte into ST2 */ ECi( sll_op_, SIM_T2 , SIM_T2, 24); /* exclusive entry = 0x80 */ ECi( ADDR_ADDI_OP, SIM_T4, PC_REG, /* update PC - IS THIS NEEDED???? */ COMPOSE_PC(trans) - trans->instrGrp->virt_pc); /* Maybe we need the PC on cache hits? */ ECi( REG_ST_OP, V0, VSS_BASE, PC_OFF); if( new_state == MEM_D_EXCLUSIVE ) { v_bltii( VREGS[SIM_T2], 0, cache_hit ); /* MEM_D_EXCLUSIVE: branch on PA WRITE Hit */ } else { v_bneii( VREGS[SIM_T2], 0, cache_hit); /* MEM_SHARED: branch on PA READ Hit */ } ECnop; v_label(mmu_or_cache_miss); /* jump here on mmu/cache miss */ ECi(ADDR_ADDI_OP, SIM_T4, PC_REG, /* Update PC ???? */ COMPOSE_PC(trans) - trans->instrGrp->virt_pc); ECi(REG_ST_OP, V0, VSS_BASE, PC_OFF); ECi(addiu_op_, A3, G0, trans->cycle_correction); /* Correct the cycle count */ ECi(addiu_op_, A1, G0, new_state); if( embra.sequential ) { ECi(addiu_op_, SIM_T2, G0, 0); /* In MPinUP no need to rewind quick check */ } else { ECilab(addiu_op_, SIM_T2, G0, USE_LABEL_VALUE(rewind_dqc)); } ECj(jal_op_, pa_mem_ref_wrapper); /* In same segment */ ECnop; v_label(cache_hit); /* jump here on cache hit */ }}/* This assumes rs has the base address (without extra field) It returns the translated address in SIM_T1 */static int D_Memory_Check( TransState *trans, unsigned instr, char new_state){ int tmp1; int dst; trans->fp_tested = 0; /* Put addr into A_REG0 */ /* GPR[rs] + offset -> A_REG0 */ tmp1 = Load( A0, rs(instr) ); dst = tmp1; /* So the deal is -- We need the Base+Offset value in A0 for the callout */ /* if we don't do it here, then our QC rewind gets hassled */ /* we could only do it in the callout part of the cache check, but then */ /* different QC's would be different lengths. */ /* The solution to that would be for the QC to rewind itself either */ /* with a jump or passing the rewind offset to the wrapper function */ if( IMMED(instr) ) { ECi(ADDR_ADDI_OP, A0, tmp1, IMMED(instr) ); dst = A0; } else { /* Ensure that address gets loaded into A0 for the call to mem_ref */ ECs(or_op_, A0, G0, tmp1); dst = A0; } switch( embra.emode ) { case EMBRA_PAGE: Page_D_Cache_Check( trans,new_state, dst); break; case EMBRA_CACHE: Cache_D_Cache_Check( trans,new_state, dst); break; default: CPUError("Embra mode incorrect\n"); } return SIM_T1;}static void Check_Timer( int num_cycles, uint pc_val, int bd_slot ){ extern int clock_val; /* In clock.c */ PC_BD pc = pc_val | bd_slot; /* bd_slot must be 1 or 0 */ /* Decrement by number of instructions, allowing processor speed to */ /* be controlled by timer value */ /* can fit this in 16 bits. As processor speeds up be careful */ ECi(addiu_op_, CLOCK_REG, CLOCK_REG, -num_cycles); /* Store cycle count for interventions */ if( embra.emode == EMBRA_CACHE &&embra.parallel ) { ECi(sw_op_, CLOCK_REG, VSS_BASE, CCD_OFF); } else { /* Don't allow b in bdelay slot */ if( num_cycles == 2 ) { ECnop; } } ECilab(blez_op_, G0, CLOCK_REG, USE_LABEL( do_periodic ) ); ECnop; /*XXXXX - DANGER - NO translation may begin with a store to the */ /*simulated state. If it did, it could be in this branch delay */ /*slot, and that would cause incorrect behavior */#ifdef CHAIN_BAR_SYNC { extern void embra_sync_barrier(void); int OK = 1; int i; for( i = 0; i < TOTAL_CPUS; i++ ) OK = OK && EMP[i].outOfSlaveLoop; if( OK ) { ECi(ori_op_, SIM_T4, G0, 1); /* This thing is broken */ ECj(jal_op_, embra_sync_barrier); ECi(sw_op_, SIM_T4, VSS_BASE, OUTTC_OFF); ECi(sw_op_, G0, VSS_BASE, OUTTC_OFF); } }#endif}static voidIncrement_Memory_Access_Count( int num_D_accesses, int num_I_accesses ){ if( embra.emode == EMBRA_CACHE ) { if( num_D_accesses ) { ECi(addiu_op_, DHIT_REG, DHIT_REG, num_D_accesses ); } if( num_I_accesses ) { ECi(addiu_op_, IHIT_REG, IHIT_REG, num_I_accesses ); } }}/* The kernel doesn't want to spill fp registers on context switches, *//* so it sets cp1 unusable, and relies on a coprocessor unusable *//* exception to tell it that the process is using floating point */static voidCheck_C1_Usable(TransState *trans){ if (trans->fp_tested) { /* * Simple optimization. Do not check on consecutive * FP ops. fp_tested gets cleared on all callouts. */ return; } /* * Determine whether the FP register file needs to be * loaded or not. This is the complementary operation * to SPILL_FP_ENABLED callout macro. */ ECi(lw_op_,SIM_T1, VSS_BASE,FPLOADED_OFF); ECi(bne_op_,G0,SIM_T1,3); ECi(ori_op_,SIM_T1,G0,1); ECi(sw_op_,SIM_T1,VSS_BASE,FPLOADED_OFF); ECj(jal_op_, RestoreFP); ECnop; VCTARGET; /* * Check if the OS has enabled the COP1. Generate * an exception callout otherwise. Note that the exception callout * never returns. */ ECi(REG_LD_OP, SIM_T4, VSS_BASE, CP0_OFF + C0_SR*REG_SIZE); /* XXX - this assumes that SR_CU1 has its bit set in the upper 16 */ ECi(lui_op_, SIM_T1, G0, SR_CU1>>16); ECs(and_op_, SIM_T4, SIM_T4, SIM_T1); /* XXX - this offset depends on implementation of Do_Callout */ ECi(bne_op_, G0, SIM_T4, DO_CALLOUT_LEN); Do_Callout( trans,CALLOUT_RAISE_C1_UNUSABLE); VCTARGET; /* * Need to do this after the callout since the callout * clears fp_tested. That's fine since the callout never * returns if it is taken. */ trans->fp_tested = 1;}/* A (very simple) pipeline timing model: * Assume 1 CPI for straight-line code; * branch and other stalls are accounted for * elsewhere (???) */uintPipe_Time(InstrGrp* thisGrp, int is_delay_slot_instr ){ if( is_delay_slot_instr ) { ASSERT( thisGrp->GrpLen == 2 ); return (thisGrp->GrpLen - 1); } return (thisGrp->GrpLen);}/*---------------------------------------------------------------------------- * * Routine: Translate * * This is the main translation routine. It calls DecodeInstr which * decodes a basic block of instructions, and then for each instruction * it switches off the opcode and write the appropirate translation. * Other code emitted: * We emit I-cache checks at the start of a basic block and at * I-cache line boundaries. We check the clock at the start to see if * we need to take a clock interrupt. We decrement the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -