📄 iq_segmented.cc

📁 linux下基于c++的处理器仿真平台。具有处理器流水线
💻 CC
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* * Copyright (c) 2001, 2002, 2003, 2004, 2005 * The Regents of The University of Michigan * All Rights Reserved * * This code is part of the M5 simulator, developed by Nathan Binkert, * Erik Hallnor, Steve Raasch, and Steve Reinhardt, with contributions * from Ron Dreslinski, Dave Greene, Lisa Hsu, Kevin Lim, Ali Saidi, * and Andrew Schultz. * * Permission is granted to use, copy, create derivative works and * redistribute this software and such derivative works for any * purpose, so long as the copyright notice above, this grant of * permission, and the disclaimer below appear in all copies made; and * so long as the name of The University of Michigan is not used in * any advertising or publicity pertaining to the use or distribution * of this software without specific, written prior authorization. * * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE * UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND * WITHOUT WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER * EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE. THE REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE * LIABLE FOR ANY DAMAGES, INCLUDING DIRECT, SPECIAL, INDIRECT, * INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM * ARISING OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF SUCH * DAMAGES. */#include <iomanip>#include <sstream>#include "base/cprintf.hh"#include "base/statistics.hh"#include "encumbered/cpu/full/cpu.hh"#include "encumbered/cpu/full/create_vector.hh"#include "encumbered/cpu/full/dep_link.hh"#include "encumbered/cpu/full/iq/iqueue.hh"#include "encumbered/cpu/full/iq/segmented/iq_segmented.hh"#include "encumbered/cpu/full/iq/segmented/seg_chain.hh"#include "encumbered/cpu/full/ls_queue.hh"#include "encumbered/cpu/full/reg_info.hh"#include "mem/mem_interface.hh"#include "sim/builder.hh"#include "sim/eventq.hh"#include "sim/stats.hh"using namespace std;#define DEBUG_PROMOTION 0#define USE_NEW_SELF_TIME_CODE 1#define SANITY_CHECKING 1#define use_mod_pushdown      1#define use_mod_bypassing     1#define bypass_slot_checking  0//  Always begin self-timing when the chain head issues#define load_chain_st         0//  Begin self-timing only if Segment 0 is less than half-full//  of not-ready instructions#define s0_st_limit           0#define DUMP_CHINFO 0//==========================================================================////  The "segmented" instruction queue implementation////==========================================================================////  The constructor//SegmentedIQ::SegmentedIQ(string n,			 unsigned _num_segments,			 unsigned _max_chain_depth,			 unsigned _segment_size,			 unsigned _segment_thresh,			 bool en_pri,			 bool _use_bypassing,			 bool _use_pushdown,			 bool _use_pipelined_prom) : BaseIQ(n){    string s;    //    //  Save params    //    num_segments    = _num_segments;    max_chain_depth = _max_chain_depth;    segment_size    = _segment_size;    segment_thresh  = _segment_thresh;    enable_priority = en_pri;    use_bypassing        = _use_bypassing;    use_pushdown         = _use_pushdown;    use_pipelined_prom   = _use_pipelined_prom;    if (s0_st_limit && load_chain_st)	fatal("SegmentedIQ: You really don't want to use both ld_chain_st "	      "AND s0_st_limit");    last_new_chain = 0;    total_size = num_segments * segment_size;    set_size(total_size);    last_segment = num_segments - 1;    deadlock_seg_flag = num_segments + 1;    //    //  Allocate the IQStations    //    //  [total_size elements, allocated, doesn't grow]    //    active_instructions = new iq_list(total_size, true, 0);    //    //  Initialize now that we have the parameters    //    seg_thresholds = new unsigned[num_segments];    free_slot_info = new unsigned[num_segments];    //  An array of pointers to segment_t    queue_segment = new segment_ptr_t[num_segments];    //  Initialize Deadlock    deadlock_recovery_mode = false;    deadlock_slot = 0;    dedlk_promotion_count = 0;    dedlk_issue_count = 0;    iq_heads = 0;    //    head_count = 0;    //    //  Statistics    //    pushdown_events = new Stats::Scalar<>[num_segments - 1];    pushdown_count = new Stats::Scalar<>[num_segments - 1];    total_pd_events = 0;    total_pd_count = 0;    deadlock_events = 0;    deadlock_cycles = 0;    last_deadlock = 0;    total_ready_count = 0;    cum_delay = 0;    st_limit_events = 0;    rob_chain_heads = 0;    seg0_prom_early_count = 0;}voidSegmentedIQ::init(FullCPU *_cpu, unsigned dw, unsigned iw, unsigned qn){    unsigned cum_thresh = 0;    // Call the base-class init    BaseIQ::init(_cpu, dw, iw, qn);    num_chains = cpu->max_chains;    total_insts = 0;    for (int i = 0; i < cpu->number_of_threads; ++i)	insts[i] = 0;    for (unsigned i = 0; i < num_segments; ++i) {	stringstream s;	cum_thresh += segment_thresh;	seg_thresholds[i] = cum_thresh;	s << name() << ":" << setw(2) << setfill('0') << i << ends;	queue_segment[i] =	    new segment_t(this, s.str(), i, num_segments, segment_size,			  num_chains, cum_thresh, use_pipelined_prom,			  enable_priority);    }    total_thresh = cum_thresh;    ccprintf(cerr,	     "******************************************\n"	     "  IQ Model : Segmented\n"	     "  %6u Segments\n"	     "     (size=%u, delta-thresh=%u)\n"	     "     (%u Total slots)\n"	     "  %6u Chains (maximum)\n"	     "  %6u Maximum chain length\n"	     "*****************************************\n"	     "\n",	     num_segments, segment_size, segment_thresh, total_size,	     num_chains, max_chain_depth);}SegmentedIQ::~SegmentedIQ(){    delete[] pushdown_events;    delete[] pushdown_count;    for (int i = 0; i < num_segments; ++i)	delete queue_segment[i];    delete[] queue_segment;    delete[] free_slot_info;    delete[] seg_thresholds;    delete active_instructions;    delete hm_predictor;}//============================================================================////  Shared information structure...////     This structure holds information which must be shared between all//     clusters, or between the clusters and the dispatch stage.////     The buildSharedInfo() method is called for IQ[0], then the structure//     address is passed to any remaining clusters via the setSharedInfo()//     method//ClusterSharedInfo *SegmentedIQ::buildSharedInfo(){    ClusterSharedInfo *rv = new ClusterSharedInfo;    //    //  Build the Chain Info Table    //    rv->ci_table = new SegChainInfoTable(num_chains, cpu->numIQueues,					 num_segments, use_pipelined_prom);    rv->hm_predictor = new SaturatingCounterPred(name()+":HMP", "miss", "hit",						 12, 4, 0, 1, 13);    rv->lr_predictor = new SaturatingCounterPred(name()+":LRP", "0", "1", 10);    rv->total_chains = num_chains;    //  make a copy of the info locally    setSharedInfo(rv);    return rv;}voidSegmentedIQ::setSharedInfo(ClusterSharedInfo *p){    shared_info = p;    //  for local use...    reg_info_table = p->ri_table;    chain_info     = static_cast<SegChainInfoTable *>(p->ci_table);    hm_predictor = static_cast<SaturatingCounterPred *>(p->hm_predictor);    lr_predictor = static_cast<SaturatingCounterPred *>(p->lr_predictor);    //  tell all the segments where this info is...    for (int i = 0; i < num_segments; ++i) {	queue_segment[i]->reg_info_table = reg_info_table;	queue_segment[i]->chain_info = chain_info;    }}//============================================================================////  Add an instruction to the queue:////  All instructions "live" in the "active_instructions" list. We pass//  iterators to these entries around instead of moving the data. This//  also makes it possible to walk dependence chains at writeback time//SegmentedIQ::iq_iteratorSegmentedIQ::add_impl(DynInst *inst, InstSeqNum seq, ROBStation *rob,		      RegInfoElement *ri, NewChainInfo *new_chain){    IQStation rs;  // We'll fill in these fields then copy the object    unsigned follows_chain = 1000;    unsigned n_chains = 0;    //    //  If we're in the process of recovering from a deadlock condition,    //  then disable instruction dispatch    //    if (deadlock_recovery_mode)	return 0;    rs.inst		= inst;    rs.in_LSQ		= false;    rs.ea_comp		= inst->isMemRef();    rs.seq		= seq;   // The dispatch sequence, not fetch sequence    rs.queued		= false;    rs.squashed		= false;    //rs.blocked	= false;    rs.dispatch_timestamp = curTick;    rs.lsq_entry	= 0;  // may be changed by dispatch()    rs.tag		= inst->fetch_seq;    rs.rob_entry	= rob;    rob->head_of_chain = false;    assert(!new_chain->out_of_chains);    //  Now that we're sure we'll add this instruction...    ++total_insts;    ++insts[inst->thread_number];    //  Insert this info into instruction record    for (int i = 0; i < TheISA::MaxInstSrcRegs; ++i)	rs.idep_info[i] = new_chain->idep_info[i];    rs.head_of_chain      = new_chain->head_of_chain;    rs.head_chain         = new_chain->head_chain;    rs.pred_last_op_index = new_chain->pred_last_op_index;    rs.lr_prediction      = new_chain->lr_prediction;    //    //  Add this instruction to the active list so we have someplace to link    //  ideps, etc    //    iq_iterator p = active_instructions->add_tail(rs);    p->hm_prediction    = new_chain->hm_prediction;    rob->hm_prediction  = p->hm_prediction;    //    //  Determine which segment this instruction will be placed in    //    //  Result is determined by bypassing mode, etc.    //    unsigned destination = choose_dest_segment(p);    //    //  Now, link into producing instructions and predict issue/writeback    //    unsigned op_lat = cpu->FUPools[0]->getLatency(rs.opClass());    Tick   max_wb = 0;    Tick   max_chained_op_rdy_time = 0;    unsigned max_depth = 0,	     max_delay = 0;    Tick   max_op_ready_time = 0;    //  Adjust the instruction latency if this is the EA-Comp portion of    //  a LOAD instruciton    if (rs.inst->isLoad()) {	op_lat += cache_hit_latency;    }    //    //  This hunk of code not only links the input deps, but also returns    //  the expected cycle that that dep will write-back. We use these    //  values to determine the latest WB time. The latest WB time will    //  be used to determine the earliest Issue cycle, and all other time    //  calculations follow from there.    //    StaticInstPtr<TheISA> si = rs.inst->staticInst;    StaticInstPtr<TheISA> eff_si = rs.ea_comp ? si->eaCompInst() : si;    p->num_ideps = 0;    for (int i = 0; i < eff_si->numSrcRegs(); ++i) {	Tick this_wb = link_idep(p, eff_si->srcRegIdx(i));	//  Find the latest predicted ops-ready time from all inputs	//  (we'll over-write this later with the chained value, if	//   we decide that we're chained)	if (this_wb > max_op_ready_time)	    max_op_ready_time = this_wb;	//  If this op is chained & not-ready, we have to look at it	if (!p->idep_ready[i] && new_chain->idep_info[i].chained) {	    ++n_chains;	    if (this_wb > max_chained_op_rdy_time) {		follows_chain = new_chain->idep_info[i].follows_chain;		max_depth = new_chain->idep_info[i].chain_depth;		max_delay = new_chain->idep_info[i].delay;		max_chained_op_rdy_time = this_wb;	    }	}    }    // This shouldn't be necessary, since we should never look past    // num_ideps in the array, but there are too many loops that go    // all the way to TheISA::MaxNumSrcRegs.    for (int i = p->num_ideps; i < TheISA::MaxInstSrcRegs; ++i) {	p->idep_ptr[i] = 0;	p->idep_reg[i] = 0;	p->idep_ready[i] = true;    }    //  If we have ANY chains, we want to use the chained time    if (n_chains)	max_op_ready_time = max_chained_op_rdy_time;    //  Now, calculate the predicted issue cycle...    //  Note that we can't issue until the cycle AFTER the instruction arrives    //  in segment zero.    if (max_wb < curTick + destination + 1)	p->pred_issue_cycle = curTick + destination + 1;    else	p->pred_issue_cycle = max_wb;    //  ... and store it    rob->pred_issue_cycle = p->pred_issue_cycle;    //  Use that to calculate the predicted WB cycle    rob->pred_wb_cycle = p->pred_issue_cycle + op_lat;    p->pred_ready_time = max_op_ready_time;    //    //  Decide whether we want the output of this inst to be chained from    //  an incoming chain... Wierdness due to the possiblity of following    //  multiple chains (this forces an instruction following multiple    //  chains to be a "head")...    //    bool chained = false;    if (n_chains > 1) {	//  We'd better be the head of a new chain!    } else if (n_chains == 1) {	chained = true;    } else {	// Operands are self-timed (or ready)	// ==> The result register should be marked as self-timed	//     (ie. not chained)    }    inst_depth_dist.sample(max_depth);    inst_depth_lat_dist.sample(max_delay);    //    //  "pred_wb_time" is an prediction of when this instructions    //  RESULT VALUES will be ready... We put this value into the    //  register-info table.    //    Tick pred_wb_time;    if (p->ops_ready()) {	p->ready_timestamp = curTick;	delay_at_ops_rdy_dist.sample(0);    }    pred_wb_time = rob->pred_wb_cycle;    //    //  If this instruction is the head of a chain    //    if (p->head_of_chain) {	++iq_heads;    // decremented when head leaves the IQ/LSQ	//  Make sure the creator seq number matches correctly...	if (!rs.ea_comp) {	    (*chain_info)[p->head_chain].creator = seq;	} else {	    // the LSQ portion of this (must be a store) will generate value	    (*chain_info)[p->head_chain].creator = seq + 1;	}	(*chain_info)[p->head_chain].created_ts = curTick;	(*chain_info)[p->head_chain].head_level = destination;	//	//  The ROB element is what actually does the writeback, since the	//  instruction will have been removed from the queue at issue...	//	rob->head_of_chain = true;	rob->head_chain = p->head_chain;	//	//  Set the chain depth to one	//	(*chain_info)[p->head_chain].chain_depth = 0;	max_depth = 0;  // Put this plus one into the reg_info struct    }    for (int i = 0; i < TheISA::MaxInstSrcRegs; ++i) {	if (new_chain->idep_info[i].chained) {	    //  Let's see just how long this chain is...	    //  if we're using a register created near the head of the chain,	    //  our depth may not be the deepest of all chained instructions	    SegChainInfoEntry &info =		(*chain_info)[new_chain->idep_info[i].follows_chain];	    if (max_depth >= info.chain_depth)		info.chain_depth = max_depth + 1;	}    }#if 0   // we don't actually set hmp_func anywhere...    //    //  HMP:  Add latency to the result of this load if we predict a load miss    //    if (use_hm_predictor && hmp_func == HMP_LATENCY && inst->isLoad()) {	//  If we predict this to be a miss	if (p->hm_prediction == MA_CACHE_MISS)	    pred_wb_time += MISS_ADDITIONAL_LATENCY;    }    //    //  Special handling if we're doing BOTH...    //  (prediction has already been made and stored in ROB... don't    //   want to make _another_ one -- first in choose_chain() )    //    if (use_hm_predictor && hmp_func == HMP_BOTH && inst->isLoad()) {	//  If we predict this to be a miss	if (p->hm_prediction == MA_CACHE_MISS)	    pred_wb_time += MISS_ADDITIONAL_LATENCY;    }#endif#if DUMP_CHINFO    cout << "@ " << curTick << ": #" << seq << " head of C#";    if (rs.head_of_chain)	cout << rs.head_chain;
12 3 4 下一页
💿 文件大小 7222 K
👤 上传用户 jwl119
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #处理器 #仿真平台 #流水线
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -