📄 translator.c
字号:
/* * Copyright (C) 1996-1998 by the Board of Trustees * of Leland Stanford Junior University. * * This file is part of the SimOS distribution. * See LICENSE file for terms of the license. * *//*************************************************************************** File: translator.c * This is a major file for Embra** This file takes gets called from the main simulator loop * (continue_run) with a virtual PC from which we read a basic block * of instructions, write them into the translation cache, and then * return the address of the start of the block in the translation * cache. * The main structure of the module is a switch statement that takes * each instruction and writes its translation. * Some optimization, like not emitting a load nop when it is not * needed, has been implemented, but there is much more room for * optimization, especially avoiding uncessesary loads (and stores) * when two adjacent instructions depend on each other * We implement chaining in this module. The idea of chaining is,* instead of jumping back to the main simulator loop at the close of a* basic block, jump directly to the next block. * D_Cache_Check and I_Cache_Check contain the very delicate quick* check code that sees if we need if we need to call out to mem_ref.* These routines also ensure that the line in question is on a TLB* mapped page* When a jump or branch is the last instruction on a page, we want to avoid* using physical addresses since it is possible that the delay slot* instruction is on different physical pages for different processes.* Thus we don't insert blocks that have jumps as their last instruction* into the lookup tables under their physical address, so each ASID has* their own translated version of these blocks.* I assume that all basic blocks are assumed to be less than (1<<15)-1 bytes* $author mencer $* $date 5/21/96 $* ***************************************************************************/#include <unistd.h>#include <stdio.h>#include <string.h>#include <fcntl.h>#include <bstring.h>#include <sys/cachectl.h>#include <sys/signal.h>#include <sys/types.h>#include <sys/mman.h>#include <stdlib.h>#include "simmisc.h"#include "embra.h"#include "annotations.h"#include "translator.h"#include "decoder.h"#include "mem_control.h"#include "qc.h"#include "cp0.h"#include "driver.h"#include "callout.h"#include "main_run.h"#include "tc_coherence.h"#include "stats.h"#include "cpu_interface.h"#include "clock.h"#include "userflush.h"#include "fpu.h"#include "vcode.h"/* note: following included just for the assertion at the beginning * of translate(). */#include "machine_defs.h"#define CHECKREGS 1#if CHECKREGSvoid CheckRegs(int current_target, int current_pc, int physaddr);int check_regs=0;int check_every=1; /* check every basic block? */int nblocks=0;int print_blocks=1;int die_reg=1;uint64 min_cycles=0;#endif/* **************GLOBALS***********/#define memptr v_ip/* Offsets into translated block */int SPECULATIVE_ENTRY = 0; /* Is PC correct? */int CHECK_MAPPING_ENTRY = 0; /* Is physical addr correct? */int SAME_PAGE_ENTRY = 0; /* No checks required *//* XXX These are REAL IMPORTANT *//* This is wrong #define I_QC_LEN (embra.MPinUP?12:10) #define I_PA_LEN (embra.MPinUP?18:16) #define IREF_LEN (embra.useVQC?I_QC_LEN:I_PA_LEN)*/int I_QC_LEN = 0; /* length of I-cache quick check */int I_PA_LEN = 0; /* length of I-cache cache check */#define IREF_LEN (embra.useVQC?I_QC_LEN:I_PA_LEN) /* length of I-cache check *//* What chaining mode are we in? *//* NO_CHAINING - all BB go to dispatch *//* BB_CHAINING - BB link to each other */enum {NO_C=0, BB_C=1} chainMode;/* These function as labels for backward branches *//* Management of labels */static struct { v_label_type cont_run_adj_clock; v_label_type cont_run; v_label_type do_periodic; v_label_type rewind_dqc; v_label_type rewind_iqc; uint* cont_run_adj_clock_addr; uint* cont_run_addr; uint* do_periodic_addr; uint* rewind_dqc_addr; uint* rewind_iqc_addr;} labels;typedef struct { int real; int used;}reg_t;static reg_t prev_store;/* These are function pointers used in the translation process *//* They are setup in Translator_Init */void* dispatchChain;void* dispatchNoChain;void* periodicCallout;/* Turn this to 0 to stop register allocation. Curent max == 8*//* XXX This may blow us away, but I'm trying it right now for * exact compatibility..... */#define NUM_ALLOCATABLE_REGS 4/* Actual space for register allocation structures */alloc_reg_t reg2alloc[32];/* Pointers to register allocation structures, so it can be sorted by *//* register use *//* Array field must be greater than 0 */#if NUM_ALLOCATABLE_REGS > 0 alloc_reg_t* src2alloc[NUM_ALLOCATABLE_REGS];#else/* Not used, just here for the compiler */ alloc_reg_t* src2alloc[1];#endif#if defined(SIM_MIPS32)#define JMP_PC_MASK 0xf0000000#else#define JMP_PC_MASK 0xfffffffff0000000LL#endif/* Always allocate registers in this order. *//* Any regsiters in this list should be saved and restored on a callout. See callout.s */unsigned reg_alloc[] = REGALLOC_LIST;typedef enum {SEQ_FLOW, JMP_FLOW, BRANCH_FLOW, REGINDIRECT_FLOW, BRANCH_TAKEN, BRANCH_UNTAKEN} flow_t;typedef struct TransState { InstrGrp *instrGrp; flow_t flow; VA curPC; int cycle_correction; VA composePC; unsigned branch_instr; int fp_tested; } TransState;/* **************END GLOBALS***********//****************************************************************//* Local Functions *//****************************************************************//* Register Allocation functions *//* ************************************************************* * Some type checking for the register allocation * (the hacked up syle) * *************************************************************/static int Load( int sim, int real );static void Load_Move( int sim, int real );static void Store( int used, int real);/* Use this to move a known register to a possibly allocated register */static void Store_Move( int contents, int store_or_alloc );/* Load value from simulated regs into real regs */static int Preload_Regs(void);static int Set_Destination( int suggestion, int reg );#if defined(SIM_MIPS64)static void Load_64_Bit_Immed( int reg, Reg64 immed );#define Load_Reg_Immed(_reg,_imm) Load_64_Bit_Immed(_reg,_imm);#define LOAD_REG_SIZE 2#endifstatic void Load_Op_Immed( int loadOpCode, int reg, uint addr );/* Allows all forms of chaining */static void Page_Prelude_Chain_Check (InstrGrp *instrGrp, int cycles );static void Cache_Prelude_Chain_Check(InstrGrp *instrGrp, int cycles );/* Emits the chaining jump */static void Transfer_To( TransState *trans,VA newPC );/* Function for manipulating the PC */static void Update_PC( TransState *trans, flow_t flow, VA next_PC);/* Callout functions */static void Do_Callout (TransState *trans, int callout_code );static void Do_Exception_Callout(TransState *trans, int exception_code);/* Quick Check Functions */static void Page_D_Cache_Check (TransState *trans,char new_state,int init_reg);static void Cache_D_Cache_Check (TransState *trans,char new_state,int init_reg);static int D_Memory_Check (TransState *trans,unsigned instr, char new_state);static void I_Memory_Check (TransState *trans,int cpuNum, VA imm);/* Clock maintenence */static void Check_Timer( int num_cycles, uint pc, int bd );/* Pipeline timing model */static uint Pipe_Time(InstrGrp* thisGrp, int is_delay_slot_instr );/****************************************************************//* END local functions *//****************************************************************//* This disables speculative (jr) chaining *//* #define DISABLE_SPECULATIVE_CHAINING *//* extract PC from TransState structure */#define COMPOSE_PC(_tr) ((_tr)->curPC | ((_tr)->flow!=SEQ_FLOW))/* actual mem address of the pointer passed in *//* #define mem_size(x) sizeof(int)*(x) */#define mem_size(x) ((x)<<2)/* * Offset into CPUState of a (32 or 64-bit) register. */#define REG_OFFSET(_x) (((_x)*sizeof(Reg)) + GP_OFF)#if defined(SIM_MIPS64)#define MAX_LONG_CONST 16#define MAX_SIZE_LONG_CONST (MAX_LONG_CONST*8)static struct LongConstTable { Reg64 value; /* Value to be loaded */ TCA memptr; /* Instruction seq do to load */ int regno; /* Where to put value */} longConstTable[MAX_LONG_CONST];static int nextLongConstIndex;#define InitLongConst() {nextLongConstIndex = 0;}#define AddLongConst(_val, _memptr, _regno) { \ ASSERT(nextLongConstIndex < MAX_LONG_CONST); \ longConstTable[nextLongConstIndex].value = (_val); \ longConstTable[nextLongConstIndex].memptr = (_memptr); \ longConstTable[nextLongConstIndex].regno = (_regno); \ nextLongConstIndex++; }static void FillInLongConst(void);#else/* Not used or need in 32bit mode */#define MAX_SIZE_LONG_CONST 0#define InitLongConst()#define AddLongConst(_val, _memptr, _regno) ASSERT(0);#define FillInLongConst()#endif /* ******************************************************************* * Longest translation (expressed in instructions) * longest instruction : ldc1_op : 1 * D_Memory_Check = 23 * Check_C1_Usable = 15 (=9+callout) * Annotation(callout) = 6 * ------------ * 45 * This is the max. possible value, and is checked anyway post-facto * ******************************************************************/#define CHECK_TIMER 3#define INCREMENT_MEM_ACCESS_COUNT 2#define LONGEST_TRANS(grp) (IREF_LEN+SPECULATIVE_ENTRY+CHECK_MAPPING_ENTRY+SAME_PAGE_ENTRY + CHECK_TIMER + INCREMENT_MEM_ACCESS_COUNT + grp * (45))#define _nop 0 /* I am no longer R3000 compatible, so this is near useless */#ifdef R3000_COMPAT/* No load NOPs needed on the R4000 */#define TRAILING_LOAD( u ) prev_load = u;#else#define TRAILING_LOAD( u )#endif/*----------------------------------------------------------------------------- * * This is the register allocation section * *----------------------------------------------------------------------------*//* Allocate registers for vcode */v_reg_type VREGS[32]; /* integer registers */v_reg_type FVREGS[32]; /* floating point registers */v_label_type label; /* universal label to branch to */void VC_Allocate_Regs(void){ int i; /* allocate all of the registers which the emulator uses; we only * have 16 registers available for vcode; (less with x86!!!) Tragically, this doesn't work; instead we have the following repulsive hack. v_getreg(&VREGS[VSS_BASE], V_I, V_TEMP); v_getreg(&VREGS[QC_REG], V_I, V_TEMP); v_getreg(&VREGS[MMU_REG], V_I, V_TEMP); v_getreg(&VREGS[PC_REG], V_I, V_TEMP); v_getreg(&VREGS[CLOCK_REG], V_I, V_TEMP); v_getreg(&VREGS[MMU_REG], V_I, V_TEMP); v_getreg(&VREGS[SIM_T1], V_I, V_TEMP); v_getreg(&VREGS[SIM_T2], V_I, V_TEMP); v_getreg(&VREGS[SIM_T4], V_I, V_TEMP); v_getreg(&VREGS[BRANCHREG], V_I, V_TEMP); v_getreg(&VREGS[SHADOW0], V_I, V_TEMP); v_getreg(&VREGS[SHADOW1], V_I, V_TEMP); v_getreg(&VREGS[SHADOW3], V_I, V_TEMP); v_getreg(&VREGS[SHADOW3], V_I, V_TEMP); */ for (i=0; i<32; i++) VREGS[i].reg=i; /* allocate all floating point registers */ for (i=0; i<32; i++) FVREGS[i].reg=i; /* v_getreg(&FVREGS[i], V_F, V_TEMP); */ } /* This definition of Load allows us to optimize out uneeded loads as *//* we generate code. */static int Load( int sim, int real ){ /* If you want register 0, use the real thing */ if( !real ) return 0; if( reg2alloc[real].alloc_reg ) return reg2alloc[real].alloc_reg; if( prev_store.real == real ) { if( prev_store.used != sim ) { ECs( or_op_, sim, prev_store.used, G0 ); } return sim; } ECi( REG_LD_OP, sim, VSS_BASE, REG_OFFSET(real) ); return sim;}/* This ensures that the current value of simREGS[real] is in register sim */static voidLoad_Move( int sim, int real ){ if( !sim ) return; if( reg2alloc[real].alloc_reg ) { ECs( or_op_, sim, reg2alloc[real].alloc_reg, G0 ); return; } ECi( REG_LD_OP, sim, VSS_BASE, REG_OFFSET(real) );}static int Preload_Regs(void){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -