📄 translator.c

📁 一个用在mips体系结构中的操作系统
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
  ASSERT( start + DO_CALLOUT_LEN == memptr);    trans->fp_tested = 0;}/*----------------------------------------------------------------------------- *   *  This section provides support for memory system simulation * *---------------------------------------------------------------------------*//* Input: Addr in A0   Output: Translated addr in SIM_T1*/static void Page_D_Cache_Check( TransState *trans, char new_state, int init_reg ){   if (embra.inlineQC) {      /* Compute vpn (word aligned) into SIM_T1 */      ECsh(srl_op_, SIM_T1, init_reg, 12);      ECsh(sll_op_, SIM_T1, SIM_T1, 2 );      /* Use vpn to index into TLB array */      ECs(addu_op_, SIM_T1, MMU_REG, SIM_T1);      /* Load the (shifted, K0 offset) physical page */      ECi(lw_op_, SIM_T1, SIM_T1, 0);      /* Get the offset from the VA */      ECi(andi_op_, SIM_T4, init_reg, DEFAULT_PAGESZ-1 );      /* Correct the cycle count */      ECi(addiu_op_, A3, G0, trans->cycle_correction);      if( new_state == MEM_D_EXCLUSIVE ) {         ECb(bltz_op_, SIM_T1, 9 ); /* was 7 */      } else {         /* MEM_SHARED */         ECi(bne_op_, G0, SIM_T1, 9);      }      ECnop;         /* Insert PC into state structure */       ECi(ADDR_ADDI_OP, V0, PC_REG,              COMPOSE_PC(trans) - trans->instrGrp->virt_pc);      ECi(REG_ST_OP, V0, VSS_BASE, PC_OFF);      ECi(addiu_op_, A1, G0, new_state);            /* Because we are in the same segment as TC, and we are jumping to */      /* an assembly routine, we can just use the bits directly */      ECj(jal_op_, mem_ref_wrapper);      ECnop;            /* ugh - zero t4 for or (below) -BL */      ECi(ori_op_, SIM_T4, 0, 0);      VCTARGET; /* branch target for vcode */            /* Note this delay slot instruction is needed if we DONT call out */      /* Or the offset and physical page together */      ECs(or_op_, SIM_T1, SIM_T1, SIM_T4);   /* Clear upper bit  */      ECs(and_op_, SIM_T1, MMUMASK_REG, SIM_T1);         } else {     ECi(addiu_op_, A3, G0, trans->cycle_correction);      /* Correct the cycle count */     ECi(ADDR_ADDI_OP, V0, PC_REG,           COMPOSE_PC(trans) - trans->instrGrp->virt_pc);      /* ECi( addiu_op_, A3, G0, cycle_correction );*/           switch( new_state ){      default:      case MEM_I_EXCLUSIVE:      case MEM_I_SHARED:         CPUError("MemIExclusive\n");	 ASSERT(0);         break;	 	 /*         ECj(jal_op_, Em_dynPQCish);	  *	    break;	 */	       case MEM_D_SHARED:         ECj(jal_op_, Em_dynPQCdsh);         break;      case MEM_D_EXCLUSIVE:         ECj(jal_op_, Em_dynPQCdex);         break;      }      ECnop;   }}/* * Cache_D_Cache_Check: * * This is the check for cache mode. We have two methods of simulating the * cache, the Virtual Quick Check (VQC) and the regular cache check. * * If we are using the VQC, we index (by cache line) into the VQC array * (pointed to by QC_REG), and get a status byte. * * We also do the virtual address translation here, indexing into the TLB * array (by VPN) and getting the physical page address. * * If we miss in the TLB or the cache, we call out to phys_mem_ref_wrapper, * which raises a TLB miss exception or handles the cache miss. Then * once everything has been handled, we rewind the quick check (we passed * in a rewind value telling it the size of this code) by adding an offset * (plus a delta for the jr/delay slot) to the ra and doing a jr ra. */static void Cache_D_Cache_Check( TransState *trans,char new_state, int init_reg){   static v_label_type vqc_hit, mmu_or_cache_miss,   cache_hit;							/* labels for branches */#if defined(SIM_MIPS64)  CPUError("Cache mode doesn't work on 64bit\n");#endif vqc_hit = v_genlabel();					/* allocate labels */ mmu_or_cache_miss = v_genlabel();    if (embra.useVQC){        SET_LABEL(rewind_dqc );					/* rewind_dqc => offset of first instruction */       ECsh( srl_op_, SIM_T1, init_reg, log2SCACHE_LINE_SIZE );	/* Virtual Cache line number ->SIM_T1 */    ECs( addu_op_, SIM_T2, SIM_T1, QC_REG );			/* (mem_state + (addr>>LOG2SCACHE_LINE_SIZE)) -> SIM_T2 */    ECi( lb_op_, SIM_T2, SIM_T2, 0 );				/* Load Status Byte into SIM_T2 */    ECsh( srl_op_, SIM_T1, init_reg, 12);			/* Compute VPN (word aligned) into SIM_T1 */    ECsh( sll_op_, SIM_T1, SIM_T1, 2);    ECs( addu_op_, SIM_T1, MMU_REG, SIM_T1 );			/* Use VPN to index into TLB array */    ECi( lw_op_, SIM_T4, SIM_T1, 0 );				/* Load the (shifted, K0 offset) physical page */    ECi( andi_op_, SIM_T1, init_reg, DEFAULT_PAGESZ-1);		/* Get the offset from the VA */    ECs(or_op_, SIM_T1, SIM_T1, SIM_T4);			/* Or the offset and physical page together -								 * needed if we DON'T call out */    if( new_state == MEM_D_EXCLUSIVE ) {      v_bltii( VREGS[SIM_T2], 0, vqc_hit);			/* MEM_D_EXCLUSIVE: write, branch on negative  */    } else {						      v_bneii( VREGS[SIM_T2], 0, vqc_hit);			/* MEM_SHARED: read, go if non-zero */    }    ECnop;       ECi(ADDR_ADDI_OP, V0, PC_REG,				/* Insert PC into A2, and jump to mem_ref_wrapper */	COMPOSE_PC(trans) - trans->instrGrp->virt_pc);    ECi(REG_ST_OP, V0, VSS_BASE, PC_OFF);    ECi(addiu_op_, A3, G0, trans->cycle_correction);		/* Correct the cycle count */    ECi(addiu_op_, A1, G0, new_state);        if( embra.sequential ) {      ECi(addiu_op_, SIM_T2, G0, 0);				/* In MPinUP no need to rewind quick check */    } else {      ECilab(addiu_op_, SIM_T2, G0, USE_LABEL_VALUE(rewind_dqc));    }        ECj(jal_op_, phys_mem_ref_wrapper);				/* routine is within jump range */    ECnop;    v_label(vqc_hit);						/* branch target for vcode */     } else { /* !embra.useVQC */        ECsh(srl_op_, SIM_T1, init_reg, 12);			/* Compute vpn (word aligned) into SIM_T1 */    ECsh(sll_op_, SIM_T1, SIM_T1, 2 );    ECs(addu_op_, SIM_T1, MMU_REG, SIM_T1);			/* Use vpn to index into TLB array */    ECi(lw_op_, SIM_T1, SIM_T1, 0);				/* Load the (shifted, K0 offset) physical page */    ECi(andi_op_, SIM_T4, init_reg, DEFAULT_PAGESZ-1 );		/* Get the offset from the VA */    ECs(or_op_, SIM_T1, SIM_T1, SIM_T4);			/* Or the offset and physical page together								 * - needed if we hit in TLB */    if( new_state == MEM_D_EXCLUSIVE ) {			/* Data_Write */      v_bgeii( VREGS[SIM_T1], 0, mmu_or_cache_miss);		/* write protect -> neg mmu-entry */    } else {							/* MEM_SHARED: Data_Read */      v_beqii( VREGS[SIM_T1], 0, mmu_or_cache_miss);		/* branch on MMU miss */    }    ECnop;           /* MMU hit -> check physarray ( phys addr. is in SIM_T1 )*/            ECs( and_op_, SIM_T1, MMUMASK_REG, SIM_T1);			/* delete protection bit of MMU entry */    ECsh( srl_op_, SIM_T2, SIM_T1, log2SCACHE_LINE_SIZE);	/* SIM_T2 = cache line index */    ECs( addu_op_, SIM_T2, SIM_T2, PA_REG );			/* physical line number : ST1 , ST2 = PA_REG + ST2 */    ECi( lb_op_, SIM_T2, SIM_T2, 0 );				/* Load PA entry byte into ST2 */    ECi( sll_op_, SIM_T2 , SIM_T2, 24);				/* exclusive entry = 0x80 */        ECi( ADDR_ADDI_OP, SIM_T4, PC_REG,				/* update PC  - IS THIS NEEDED???? */	COMPOSE_PC(trans) - trans->instrGrp->virt_pc);		/* Maybe we need the PC on cache hits? */    ECi( REG_ST_OP, V0, VSS_BASE, PC_OFF);    if( new_state == MEM_D_EXCLUSIVE ) {        v_bltii( VREGS[SIM_T2], 0, cache_hit );			/* MEM_D_EXCLUSIVE: branch on PA WRITE Hit */    } else {                                    v_bneii( VREGS[SIM_T2], 0, cache_hit);			/* MEM_SHARED: branch on PA READ Hit */    }    ECnop;    v_label(mmu_or_cache_miss);					/* jump here on mmu/cache miss */    ECi(ADDR_ADDI_OP, SIM_T4, PC_REG,				/* Update PC ???? */	COMPOSE_PC(trans) - trans->instrGrp->virt_pc);    ECi(REG_ST_OP, V0, VSS_BASE, PC_OFF);    ECi(addiu_op_, A3, G0, trans->cycle_correction);		/* Correct the cycle count */    ECi(addiu_op_, A1, G0, new_state);        if( embra.sequential ) {      ECi(addiu_op_, SIM_T2, G0, 0);				/* In MPinUP no need to rewind quick check */    } else {      ECilab(addiu_op_, SIM_T2, G0, USE_LABEL_VALUE(rewind_dqc));    }    ECj(jal_op_, pa_mem_ref_wrapper);				/* In same segment */    ECnop;    v_label(cache_hit);						/* jump here on cache hit */  }}/* This assumes rs has the base address (without extra field)   It returns the translated address in SIM_T1 */static int D_Memory_Check( TransState *trans,                unsigned instr,                char new_state){  int tmp1;  int dst;  trans->fp_tested = 0;  /* Put addr into A_REG0 */  /* GPR[rs] + offset -> A_REG0 */  tmp1 = Load( A0, rs(instr) );  dst = tmp1;  /* So the deal is -- We need the Base+Offset value in A0 for the callout */  /* if we don't do it here, then our QC rewind gets hassled */  /* we could only do it in the callout part of the cache check, but then */  /* different QC's would be different lengths. */  /* The solution to that would be for the QC to rewind itself either */  /* with a jump or passing the rewind offset to the wrapper function */  if( IMMED(instr) )	 {		ECi(ADDR_ADDI_OP, A0, tmp1, IMMED(instr) );		dst = A0;	 }  else	 {	  /* Ensure that address gets loaded into A0 for the call to mem_ref */		ECs(or_op_, A0, G0, tmp1);		dst = A0;	 }  switch( embra.emode ) {  case EMBRA_PAGE:     Page_D_Cache_Check( trans,new_state, dst);     break;  case EMBRA_CACHE:     Cache_D_Cache_Check( trans,new_state, dst);     break;  default:     CPUError("Embra mode incorrect\n");  }  return SIM_T1;}static void Check_Timer( int num_cycles, uint pc_val, int bd_slot ){  extern int clock_val; /* In clock.c */  PC_BD pc = pc_val | bd_slot;  /* bd_slot must be 1 or 0 */  /* Decrement by number of instructions, allowing processor speed to */  /* be controlled by timer value */  /* can fit this in 16 bits.  As processor speeds up be careful */  ECi(addiu_op_, CLOCK_REG, CLOCK_REG, -num_cycles);  /* Store cycle count for interventions */  if( embra.emode == EMBRA_CACHE &&embra.parallel ) {     ECi(sw_op_, CLOCK_REG, VSS_BASE, CCD_OFF);  } else {     /* Don't allow b in bdelay slot */     if( num_cycles == 2 ) {        ECnop;     }  }  ECilab(blez_op_, G0, CLOCK_REG, USE_LABEL( do_periodic ) );  ECnop;    /*XXXXX - DANGER - NO translation may begin with a store to the */  /*simulated state.  If it did, it could be in this branch delay */  /*slot, and that would cause incorrect behavior */#ifdef CHAIN_BAR_SYNC     {        extern void embra_sync_barrier(void);        int OK = 1;        int i;        for( i = 0; i < TOTAL_CPUS; i++ )           OK = OK && EMP[i].outOfSlaveLoop;        if( OK ) {           ECi(ori_op_, SIM_T4, G0, 1);           /* This thing is broken */           ECj(jal_op_, embra_sync_barrier);           ECi(sw_op_, SIM_T4, VSS_BASE, OUTTC_OFF);           ECi(sw_op_, G0, VSS_BASE, OUTTC_OFF);        }     }#endif}static voidIncrement_Memory_Access_Count( int num_D_accesses, int num_I_accesses ){   if( embra.emode == EMBRA_CACHE ) {      if( num_D_accesses ) {         ECi(addiu_op_, DHIT_REG, DHIT_REG, num_D_accesses );      }      if( num_I_accesses ) {         ECi(addiu_op_, IHIT_REG, IHIT_REG, num_I_accesses );      }   }}/* The kernel doesn't want to spill fp registers on context switches, *//* so it sets cp1 unusable, and relies on a coprocessor unusable *//* exception to tell it that the process is using floating point */static voidCheck_C1_Usable(TransState *trans){  if (trans->fp_tested) {    /*     * Simple optimization. Do not check on consecutive     * FP ops. fp_tested gets cleared on all callouts.     */    return;  }  /*   * Determine whether the FP register file needs to be   * loaded or not. This is the complementary operation   * to SPILL_FP_ENABLED callout macro.   */  ECi(lw_op_,SIM_T1, VSS_BASE,FPLOADED_OFF);  ECi(bne_op_,G0,SIM_T1,3);  ECi(ori_op_,SIM_T1,G0,1);  ECi(sw_op_,SIM_T1,VSS_BASE,FPLOADED_OFF);  ECj(jal_op_, RestoreFP);  ECnop;  VCTARGET;     /*    * Check if the OS has enabled the COP1. Generate    * an exception callout otherwise. Note that the exception callout    * never returns.     */   ECi(REG_LD_OP, SIM_T4, VSS_BASE, CP0_OFF + C0_SR*REG_SIZE);   /* XXX - this assumes that SR_CU1 has its bit set in the upper 16 */   ECi(lui_op_, SIM_T1, G0, SR_CU1>>16);   ECs(and_op_, SIM_T4, SIM_T4, SIM_T1);   /* XXX - this offset depends on implementation of Do_Callout */   ECi(bne_op_, G0, SIM_T4, DO_CALLOUT_LEN);   Do_Callout( trans,CALLOUT_RAISE_C1_UNUSABLE);   VCTARGET;      /*    * Need to do this after the callout since the callout    * clears fp_tested. That's fine since the callout never    * returns if it is taken.     */  trans->fp_tested = 1;}/* A (very simple) pipeline timing model: * Assume 1 CPI for straight-line code; * branch and other stalls are accounted for * elsewhere (???) */uintPipe_Time(InstrGrp* thisGrp, int is_delay_slot_instr ){   if( is_delay_slot_instr ) {      ASSERT( thisGrp->GrpLen == 2 );      return (thisGrp->GrpLen - 1);   }   return (thisGrp->GrpLen);}/*---------------------------------------------------------------------------- * *      Routine:    Translate * *  This is the main translation routine.  It calls DecodeInstr which * decodes a basic block of instructions, and then for each instruction * it switches off the opcode and write the appropirate translation. * Other code emitted: * We emit I-cache checks at the start of a basic block and at * I-cache line boundaries.  We check the clock at the start to see if * we need to take a clock interrupt.  We decrement the
上一页 1 2 3 45
💿 文件大小 1154 K
👤 上传用户 ytcehui
📂 所属分类微处理器开发
🏷️ 相关标签

#mips #操作系统
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -