dram.cc

来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· CC 代码 · 共 1,471 行 · 第 1/5 页
1,471 行
        .name(name() + ".cycles_between_misses")        .desc("cycles between open page misses")        .flags(total)        ;        other_bank_read_access_miss        .init(1)        .name(name() + ".other_bank_read_access_miss")        .desc("read miss count")        .flags(total)        ;        other_bank_write_access_miss        .init(1)        .name(name() + ".other_bank_write_access_miss")        .desc("write miss count")        .flags(total)        ;    // DR TODO for now, only output stats which are involved in power equations        total_latency        .name(name() + ".total_latency")        .desc("total DRAM latency")        ;        total_arb_latency        .name(name() + ".total_arb_latency")        .desc("total arbitration latency")        ;        avg_latency        .name(name() + ".avg_latency")        .desc("average DRAM latency")        ;        avg_arb_latency        .name(name() + ".avg_arb_latency")        .desc("average arbitration DRAM latency")        ;        bank_access_profile        .init(num_banks,num_cpus)        .name(name() + "[cpu][bank]")        .desc("DRAM bank access profile")        ;        total_icache_req        .name(name() + ".total_icache_req")        .desc("total number of requests from icache")        ;        avg_latency = total_latency / accesses;        avg_arb_latency = total_arb_latency / accesses;}// DR DEBUG: assume we have a 500 MHz CPU and 100 MHz RAM// static float cpu_ratio = 5; // ratio between CPU speed and memory bus speed// DR TODO: get this parameter from the simulationstatic char *mem_access_output=NULL;                /* latency of access [CPU cycles]*/TickDRAMMemory::calculateLatency(PacketPtr pkt){  bool cmdIsRead = pkt->isRead();  int lat=0, temp=0, current_bank=0;  int current_row=0, current_device=0;  int was_miss = 0;	// determines if there was an active row miss this access  //md_addr_t physic_address; /* linear memory address to be accessed */  Addr physic_address; /* linear memory address to be accessed */  int num_blocks=0;  int corrected_overlap, /* overlap of consecutive accesses [CPU cycles] */    overlap=0;           /* overlap of consecutive accesses [mem bus cycles] */  int adjacent=0; /* 1 indicates that current bank is adjacent to the last accessed  one*/  int chunks = (pkt->getSize() + (bus_width - 1)) / bus_width; // burst length  assert(chunks >0);  physic_address = pkt->getAddr();    ///////////////////////////////////////////////////////////////////////////    // DR added more stats for power modelling    // NOTE:    // for DRAM closed-page, automatic precharge after read or write,    // i.e. whenever idle    // count number of cycles where dram is not busy, use for CKE low signal    // calculate as percentage of all clock cycles    // if busy, do not add to idle count.  Else add cycles since last access/* #define  SD_NUM_BANKS (SD_STACK_BASE/SD_BANK_SIZE)     */      /* number of banks *//* #define  SD_NUM_ROWS (SD_BANK_SIZE/SD_ROW_SIZE)       */       /* number of rows per bank *//*delays until data is ready/written to the memory for the SDRAM*/        int SD_T_READ_READ_SROW = cas_lat; /* RAR, row hit, current bank  */        int SD_T_READ_WRITE_SROW = cas_lat; /* RAW, row hit, current bank  */        int SD_T_WRITE_READ_SROW = war_lat-1; /* WAR, row hit, current bank  */        int SD_T_WRITE_WRITE_SROW = 0; /* WAW, row hit, current bank  */        int SD_T_READ_READ_SBANK = (pre_lat+act_lat+cas_lat); /* RAR, row miss, current bank */        int SD_T_READ_WRITE_SBANK = (pre_lat+act_lat+cas_lat+(dpl_lat-1)); /* RAW, row miss, current bank */        int SD_T_WRITE_READ_SBANK = (pre_lat+act_lat); /* WAR, row miss, current bank */        int SD_T_WRITE_WRITE_SBANK = (pre_lat+act_lat+(dpl_lat-1)); /* WAW, row miss, current bank */        int SD_T_READ_READ_OBANK = (pre_lat+act_lat+cas_lat); /* RAR, row miss, another bank */        int SD_T_READ_WRITE_OBANK = (pre_lat+act_lat+cas_lat); /* RAW, row miss, another bank */        int SD_T_WRITE_READ_OBANK = (pre_lat+act_lat); /* WAR, row miss, another bank */        int SD_T_WRITE_WRITE_OBANK = (pre_lat+act_lat); /* WAW, row miss, another bank *//* best-case latencies (due to overlap / row hits in another bank) */        int SD_BEST_T_READ_READ_SROW = 0; /* RAR, row hit, current bank  */        int SD_BEST_T_READ_READ_SBANK = (act_lat+cas_lat); /* RAR, row miss, current bank */        int SD_BEST_T_WRITE_READ_SBANK = (act_lat); /* WAR, row miss, current bank */        int SD_BEST_T_READ_READ_OBANK = 0; /* RAR, row miss/hit, another bank */        int SD_BEST_T_READ_WRITE_OBANK = cas_lat; /* RAW, row miss/hit, another bank */        int SD_BEST_T_WRITE_READ_OBANK = (war_lat -1); /* WAR, row miss/hit, another bank */        int SD_BEST_T_WRITE_WRITE_OBANK = 0; /* WAW, row miss/hit, another bank */    Tick time_since_last_access = curTick-time_last_access;    Tick time_last_miss = 0;	// used for keeping track of times between activations (page misses)    //int was_idle = (curTick > busy_until);        bool srow_flag = false;        int timing_correction = 0;    int was_idle = (curTick > busy_until[current_bank]);    cycles_nCKE[0] += was_idle ? MIN(curTick-busy_until[current_bank], time_since_last_access) : 0;    // bank is precharged    //active_row[current_bank] == DR_NUM_ROWS    int all_precharged = 1;    int bank_max = num_banks;    int row_max = num_rows;    if( (mem_type == "SDRAM") && (mem_actpolicy == "closed") ) {        // SDRAM does not use the active_row array in closed_page mode        // TODO: handle closed page operation    } else {		// DRDRAM uses the active_row array        for( int i = 0; i < bank_max; i++ ) {                if( (active_row[current_bank] != row_max)) all_precharged = 0;        }    }    if(all_precharged) {        if(was_idle) {                cycles_all_precharge_nCKE[0] += MIN(curTick-busy_until[current_bank], time_since_last_access);                cycles_all_precharge_CKE[0] += MIN(0, busy_until[current_bank]-time_last_access);        }        else {                cycles_all_precharge_CKE[0] += time_since_last_access;        }    } else { // some bank is active        if(was_idle) {                cycles_bank_active_nCKE[0] += MIN(curTick-busy_until[current_bank], time_since_last_access);        }        else {        }    }    if( cmdIsRead ) {        cycles_read_out[0] += chunks;    } else {        cycles_write_in[0] += chunks;    }    time_last_access = curTick;    ////////////////////////////////////////////////////////////////////////////    if ((mem_type == "SDRAM") && (mem_actpolicy == "open"))      {        /* Split transaction on m5 makes it challenging to  */        /* model the DRAM. A single cycle latency is assumed */        /* for dequeueing an address bus request. In response to  */        /* that, the current DRAM implementation assumes that a */        /* seperate DRAM command generator / controller exists per */        /* bank and the dequeued addresses are queued to these */        /* controllers. We can view this as an ideal scenario for */        /* a shared DRAM command generator / controller with */        /* support for overlapping DRAM commands. */        /* Compare DRAM PRE,ACT,CAS etc. latencies, DRAM clock  */        /* frequency and the number of banks to determine whether */        /* the ideal scenario with a shared DRAM command generator */        /* is equivalent to having multiple DRAM command generators */        /* per bank */        if ((memctrladdr_type != "interleaved"))/* i.e. mc_type is linear */          {            current_bank=physic_address/bank_size;            temp=physic_address-current_bank*bank_size;/*address in bank*/            current_row=temp/SD_ROW_SIZE;          }        else/* mc_type interleaved */          /* This memory controller maps the addresses differently           * depending on the row_size, every row is mapped to another           * bank. Thus, the text segment uses half of every bank, the heap           * the next quarter of each bank, and the stack the rest.           */          {            num_blocks = physic_address/SD_ROW_SIZE; /* row number */            current_bank=num_blocks%num_banks;            current_row=num_blocks/num_banks;          }        if (mem_access_details == true)          {                // DR TODO            //fprintf(mem_accessfd,"       %09u  %4d   %3d\n",physic_address,current_row,current_bank);          }        else          {            if (mem_access_output!=0)              {                //fprintf(mem_accessfd,"\n");              }          }        total_access++;        if (memctrlpipe_enable == true)          {            overlap=(int)(busy_until[current_bank] - curTick);          }        else overlap = 0;        if (cpu_ratio < 1.0)          {            corrected_overlap = overlap*((int)(1/cpu_ratio)); /* floor */          }        else          {            corrected_overlap = (int) (overlap/cpu_ratio);          }        /*fprintf(stderr,"%10.0f %10.0f %4d %4d ",(double)busy_until, (double)curTick, overlap, corrected_overlap); debugging*/        if (cmdIsRead == lastCmdIsRead)/*same command*/          {            if (current_bank == last_bank)/*same bank*/              {                if (current_row == last_row)/*same row*/                  {                          /* Page Hit */                    if (cmdIsRead)                      {                        if (corrected_overlap > 0)/*overlapping*/                          {                            /*best case*/                            if (corrected_overlap >= cas_lat)                              {                                lat=SD_BEST_T_READ_READ_SROW;                                srow_flag = true;                                best_case++;                                full_overlapping++;                              }                            else/*in between case*/                              {                                lat = cas_lat-corrected_overlap;                                srow_flag = true;                                in_between_case++;                                partial_overlapping++;                              }                          }                        else                          {                            /*worst case*/                            lat = SD_T_READ_READ_SROW;                                srow_flag = true;                            worst_case++;                          }                        same_row_read_access++;                        srr_after_read++;                      }                    else/*write*/                      {/*no option case*/                        lat = SD_T_WRITE_WRITE_SROW;                        srow_flag = true;                        same_row_write_access++;                        srw_after_write++;                        worst_case++;                      }                  }                else /*other row in same bank*/                  {                        /* Page miss */                    if (cmdIsRead)                      {                        if (corrected_overlap > 0)/*overlapping*/                          {                            if (corrected_overlap >= pre_lat)/*best case*/                              {                                lat = SD_BEST_T_READ_READ_SBANK;                                best_case++;
dram.cc - 源码说明

本页面展示了「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作为模拟平台」中的 dram.cc 源码文件，采用 CC 编程语言编写，共 1,471 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与多处理器相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?