dram.cc
来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· CC 代码 · 共 1,471 行 · 第 1/5 页
CC
1,471 行
.name(name() + ".cycles_between_misses") .desc("cycles between open page misses") .flags(total) ; other_bank_read_access_miss .init(1) .name(name() + ".other_bank_read_access_miss") .desc("read miss count") .flags(total) ; other_bank_write_access_miss .init(1) .name(name() + ".other_bank_write_access_miss") .desc("write miss count") .flags(total) ; // DR TODO for now, only output stats which are involved in power equations total_latency .name(name() + ".total_latency") .desc("total DRAM latency") ; total_arb_latency .name(name() + ".total_arb_latency") .desc("total arbitration latency") ; avg_latency .name(name() + ".avg_latency") .desc("average DRAM latency") ; avg_arb_latency .name(name() + ".avg_arb_latency") .desc("average arbitration DRAM latency") ; bank_access_profile .init(num_banks,num_cpus) .name(name() + "[cpu][bank]") .desc("DRAM bank access profile") ; total_icache_req .name(name() + ".total_icache_req") .desc("total number of requests from icache") ; avg_latency = total_latency / accesses; avg_arb_latency = total_arb_latency / accesses;}// DR DEBUG: assume we have a 500 MHz CPU and 100 MHz RAM// static float cpu_ratio = 5; // ratio between CPU speed and memory bus speed// DR TODO: get this parameter from the simulationstatic char *mem_access_output=NULL; /* latency of access [CPU cycles]*/TickDRAMMemory::calculateLatency(PacketPtr pkt){ bool cmdIsRead = pkt->isRead(); int lat=0, temp=0, current_bank=0; int current_row=0, current_device=0; int was_miss = 0; // determines if there was an active row miss this access //md_addr_t physic_address; /* linear memory address to be accessed */ Addr physic_address; /* linear memory address to be accessed */ int num_blocks=0; int corrected_overlap, /* overlap of consecutive accesses [CPU cycles] */ overlap=0; /* overlap of consecutive accesses [mem bus cycles] */ int adjacent=0; /* 1 indicates that current bank is adjacent to the last accessed one*/ int chunks = (pkt->getSize() + (bus_width - 1)) / bus_width; // burst length assert(chunks >0); physic_address = pkt->getAddr(); /////////////////////////////////////////////////////////////////////////// // DR added more stats for power modelling // NOTE: // for DRAM closed-page, automatic precharge after read or write, // i.e. whenever idle // count number of cycles where dram is not busy, use for CKE low signal // calculate as percentage of all clock cycles // if busy, do not add to idle count. Else add cycles since last access/* #define SD_NUM_BANKS (SD_STACK_BASE/SD_BANK_SIZE) */ /* number of banks *//* #define SD_NUM_ROWS (SD_BANK_SIZE/SD_ROW_SIZE) */ /* number of rows per bank *//*delays until data is ready/written to the memory for the SDRAM*/ int SD_T_READ_READ_SROW = cas_lat; /* RAR, row hit, current bank */ int SD_T_READ_WRITE_SROW = cas_lat; /* RAW, row hit, current bank */ int SD_T_WRITE_READ_SROW = war_lat-1; /* WAR, row hit, current bank */ int SD_T_WRITE_WRITE_SROW = 0; /* WAW, row hit, current bank */ int SD_T_READ_READ_SBANK = (pre_lat+act_lat+cas_lat); /* RAR, row miss, current bank */ int SD_T_READ_WRITE_SBANK = (pre_lat+act_lat+cas_lat+(dpl_lat-1)); /* RAW, row miss, current bank */ int SD_T_WRITE_READ_SBANK = (pre_lat+act_lat); /* WAR, row miss, current bank */ int SD_T_WRITE_WRITE_SBANK = (pre_lat+act_lat+(dpl_lat-1)); /* WAW, row miss, current bank */ int SD_T_READ_READ_OBANK = (pre_lat+act_lat+cas_lat); /* RAR, row miss, another bank */ int SD_T_READ_WRITE_OBANK = (pre_lat+act_lat+cas_lat); /* RAW, row miss, another bank */ int SD_T_WRITE_READ_OBANK = (pre_lat+act_lat); /* WAR, row miss, another bank */ int SD_T_WRITE_WRITE_OBANK = (pre_lat+act_lat); /* WAW, row miss, another bank *//* best-case latencies (due to overlap / row hits in another bank) */ int SD_BEST_T_READ_READ_SROW = 0; /* RAR, row hit, current bank */ int SD_BEST_T_READ_READ_SBANK = (act_lat+cas_lat); /* RAR, row miss, current bank */ int SD_BEST_T_WRITE_READ_SBANK = (act_lat); /* WAR, row miss, current bank */ int SD_BEST_T_READ_READ_OBANK = 0; /* RAR, row miss/hit, another bank */ int SD_BEST_T_READ_WRITE_OBANK = cas_lat; /* RAW, row miss/hit, another bank */ int SD_BEST_T_WRITE_READ_OBANK = (war_lat -1); /* WAR, row miss/hit, another bank */ int SD_BEST_T_WRITE_WRITE_OBANK = 0; /* WAW, row miss/hit, another bank */ Tick time_since_last_access = curTick-time_last_access; Tick time_last_miss = 0; // used for keeping track of times between activations (page misses) //int was_idle = (curTick > busy_until); bool srow_flag = false; int timing_correction = 0; int was_idle = (curTick > busy_until[current_bank]); cycles_nCKE[0] += was_idle ? MIN(curTick-busy_until[current_bank], time_since_last_access) : 0; // bank is precharged //active_row[current_bank] == DR_NUM_ROWS int all_precharged = 1; int bank_max = num_banks; int row_max = num_rows; if( (mem_type == "SDRAM") && (mem_actpolicy == "closed") ) { // SDRAM does not use the active_row array in closed_page mode // TODO: handle closed page operation } else { // DRDRAM uses the active_row array for( int i = 0; i < bank_max; i++ ) { if( (active_row[current_bank] != row_max)) all_precharged = 0; } } if(all_precharged) { if(was_idle) { cycles_all_precharge_nCKE[0] += MIN(curTick-busy_until[current_bank], time_since_last_access); cycles_all_precharge_CKE[0] += MIN(0, busy_until[current_bank]-time_last_access); } else { cycles_all_precharge_CKE[0] += time_since_last_access; } } else { // some bank is active if(was_idle) { cycles_bank_active_nCKE[0] += MIN(curTick-busy_until[current_bank], time_since_last_access); } else { } } if( cmdIsRead ) { cycles_read_out[0] += chunks; } else { cycles_write_in[0] += chunks; } time_last_access = curTick; //////////////////////////////////////////////////////////////////////////// if ((mem_type == "SDRAM") && (mem_actpolicy == "open")) { /* Split transaction on m5 makes it challenging to */ /* model the DRAM. A single cycle latency is assumed */ /* for dequeueing an address bus request. In response to */ /* that, the current DRAM implementation assumes that a */ /* seperate DRAM command generator / controller exists per */ /* bank and the dequeued addresses are queued to these */ /* controllers. We can view this as an ideal scenario for */ /* a shared DRAM command generator / controller with */ /* support for overlapping DRAM commands. */ /* Compare DRAM PRE,ACT,CAS etc. latencies, DRAM clock */ /* frequency and the number of banks to determine whether */ /* the ideal scenario with a shared DRAM command generator */ /* is equivalent to having multiple DRAM command generators */ /* per bank */ if ((memctrladdr_type != "interleaved"))/* i.e. mc_type is linear */ { current_bank=physic_address/bank_size; temp=physic_address-current_bank*bank_size;/*address in bank*/ current_row=temp/SD_ROW_SIZE; } else/* mc_type interleaved */ /* This memory controller maps the addresses differently * depending on the row_size, every row is mapped to another * bank. Thus, the text segment uses half of every bank, the heap * the next quarter of each bank, and the stack the rest. */ { num_blocks = physic_address/SD_ROW_SIZE; /* row number */ current_bank=num_blocks%num_banks; current_row=num_blocks/num_banks; } if (mem_access_details == true) { // DR TODO //fprintf(mem_accessfd," %09u %4d %3d\n",physic_address,current_row,current_bank); } else { if (mem_access_output!=0) { //fprintf(mem_accessfd,"\n"); } } total_access++; if (memctrlpipe_enable == true) { overlap=(int)(busy_until[current_bank] - curTick); } else overlap = 0; if (cpu_ratio < 1.0) { corrected_overlap = overlap*((int)(1/cpu_ratio)); /* floor */ } else { corrected_overlap = (int) (overlap/cpu_ratio); } /*fprintf(stderr,"%10.0f %10.0f %4d %4d ",(double)busy_until, (double)curTick, overlap, corrected_overlap); debugging*/ if (cmdIsRead == lastCmdIsRead)/*same command*/ { if (current_bank == last_bank)/*same bank*/ { if (current_row == last_row)/*same row*/ { /* Page Hit */ if (cmdIsRead) { if (corrected_overlap > 0)/*overlapping*/ { /*best case*/ if (corrected_overlap >= cas_lat) { lat=SD_BEST_T_READ_READ_SROW; srow_flag = true; best_case++; full_overlapping++; } else/*in between case*/ { lat = cas_lat-corrected_overlap; srow_flag = true; in_between_case++; partial_overlapping++; } } else { /*worst case*/ lat = SD_T_READ_READ_SROW; srow_flag = true; worst_case++; } same_row_read_access++; srr_after_read++; } else/*write*/ {/*no option case*/ lat = SD_T_WRITE_WRITE_SROW; srow_flag = true; same_row_write_access++; srw_after_write++; worst_case++; } } else /*other row in same bank*/ { /* Page miss */ if (cmdIsRead) { if (corrected_overlap > 0)/*overlapping*/ { if (corrected_overlap >= pre_lat)/*best case*/ { lat = SD_BEST_T_READ_READ_SBANK; best_case++;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?