⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dispatch.cc

📁 linux下基于c++的处理器仿真平台。具有处理器流水线
💻 CC
📖 第 1 页 / 共 4 页
字号:
    //    // for load/stores:    // idep #0     - store operand (value that is store'ed)    // idep #1, #2 - eff addr computation inputs (addr of access)    //    // resulting IQ/LSQ operation pair:    // IQ (effective address computation operation):    // idep #0, #1 - eff addr computation inputs (addr of access)    // LSQ (memory access operation):    // idep #0     - operand input (value that is store'd)    // idep #1     - eff addr computation result (from IQ op)    //    // effective address computation is transfered via the reserved    // name DTMP    //    RegInfoElement reginfo[TheISA::MaxInstDestRegs];    ////////////////////////////////////////////////////////////////    //    //   Allocate an ROB entry for this instruction    //    //////////////////////////////////////////////////////////////////    ROBStation *rob = ROB.new_tail(thread);    rob->init(inst, dispatch_seq, numIQueues);    //////////////////////////////////////////////////////////////////    //    //  Determine the chaining information for this instruction    //    //    //    //////////////////////////////////////////////////////////////////    NewChainInfo new_chain;    if (clusterSharedInfo->ci_table != 0) {	new_chain = choose_chain(inst, iq_idx);	if (new_chain.suggested_cluster >= 0)	    iq_idx = new_chain.suggested_cluster;	// DPRINTF(Dispatch, "DISP:     chain_info: (clust %d) (head %d)\n",	//	iq_idx, new_chain.head_of_chain);    } else {	//  if we're not using chains, don't let the lack of them be        //  a problem...	new_chain.out_of_chains = false;    }    if (new_chain.out_of_chains) {	ROB.remove(rob);	++chains_insuf[thread];	return 0;    }    //////////////////////////////////////////////////////////////////    //    //  Place the instruction into the IQ    //    //  The ROB has been (almost) completely initialized    //    //////////////////////////////////////////////////////////////////    BaseIQ::iterator rs = 0;    // Send the instruction to the Instruction Queue    // (memory barriers excepted: they go to LSQ only)    if (!inst->isMemBarrier()) {	rs = IQ[iq_idx]->add(inst, dispatch_seq, rob, reginfo, &new_chain);	rs->dispatch_timestamp = curTick;	if (rs.isnull()) {	    // de-allocate the ROB & LSQ entries...	    ROB.remove(rob);	    //  we're done for this cycle	    return 0;	}    }    rob->iq_entry = rs;    //////////////////////////////////////////////////////////////////    //    //  Add this instruction to the LSQ, as necessary    //    //////////////////////////////////////////////////////////////////    if (inst->isMemRef() || inst->isMemBarrier()) {	//  Remember to link in the iq_entry!!	//	BaseIQ::iterator lsq = LSQ->add(inst, dispatch_seq + 1, rob, 0, 0);	//  Check for resource allocation failure	if (lsq.isnull()) {	    if (rs.notnull()) {		// we have to clean-up dep-links...		for (int i = 0; i < rs->num_ideps; ++i) {		    if (rs->idep_ptr[i]) {			delete rs->idep_ptr[i];			rs->idep_ptr[i] = 0;		    }		}	    }	    //  de-allocate the ROB entry	    ROB.remove(rob);	    //  de-allocate the IQ entry	    if (rs.notnull())		IQ[iq_idx]->squash(rs);	    //  We're done for this cycle	    return 0;	}	lsq->dispatch_timestamp = curTick;	lsq->iq_entry = rs;	if (rs.notnull()) {	    IQ[iq_idx]->registerLSQ(rs, lsq);	    //	rs->lsq_entry = lsq;	}	//  Mark this ROB entry as being a memory operation	//  (changes the ROB-entry sequence number to match the LSQ entry)	rob->setMemOp(lsq);	// memory barriers don't require an EA computatiaon	if (inst->isMemBarrier()) {	    rob->eaCompPending = false;	}	//  We know this instruction has dispatched... add one to	//  the sequence counter (for the LSQ entry)	++dispatch_seq;    }    //  We've dispatched... count one for the IQ    ++dispatch_seq;    //---------------------------------------------------------    //    //  Now that we know that we're going to USE the specified    //  chain...    //    if (new_chain.head_of_chain) {	++chain_heads[thread];	++chain_heads_in_rob;	clusterSharedInfo->ci_table->claim(new_chain.head_chain,					   thread, rob->seq);	if (chainWires != 0) {	    chainWires->allocateWire(iq_idx, new_chain.head_chain);	}    }    //  Annotate the ROB entry    rob->queue_num = iq_idx;    //    //  Inform all other clusters that an instruction has dispatched    //    for (unsigned i = 0; i < numIQueues; ++i)	if (i != iq_idx)	    IQ[i]->inform_dispatch(rs);    //    //  1) install outputs after inputs to prevent self reference    //  2) Update the register information table    //    rob->num_outputs = inst->numDestRegs();    for (int i = 0; i < rob->num_outputs; ++i) {	TheISA::RegIndex reg = inst->destRegIdx(i);	rob->onames[i] = reg;	create_vector[thread].set_entry(reg, rob, i, inst->spec_mode);	reginfo[i].setCluster(iq_idx);	(*clusterSharedInfo->ri_table)[thread][reg] = reginfo[i];    }    //    //  Store off the use_spec_cv bitmap and the spec_create    //  vector entries    //    if (inst->recover_inst) {	// rob->spec_state = new CreateVecSpecState(thread);	rob->spec_state = state_list.get(&create_vector[thread]);    }    ////////////////////////////////////////////////////////////    //    //  Now that we know that this instruction has made    //  it into the IQ/LSQ/ROB... count it as dispatched    //  NOTE: we include EA-comp instructions in the distribution    //    ++dispatch_count[thread];    ++dispatch_count_stat[thread];    ++dispatched_ops[thread];    if (inst->isMemRef())	++dispatched_ops[thread];    if (inst->isSerializing())	++dispatched_serializing[thread];    //    //  Add to the pipetrace...    //    if (ptrace)	ptrace->moveInst(inst, PipeTrace::Dispatch, 0, 0, 0);    /*     *  Physical registers...     */    unsigned num_fp_regs = inst->numFPDestRegs();    unsigned num_int_regs = inst->numIntDestRegs();    free_fp_physical_regs -= num_fp_regs;    free_int_physical_regs -= num_int_regs;    used_fp_physical_regs[thread] += num_fp_regs;    used_int_physical_regs[thread] += num_int_regs;    return rob;}////  Return the number of a thread which can decode instructions into the//  Decode/Dispatch queue. This requires that the thread have instructions//  in the fetch queue and that there is space available for these//  instructions in the decode queue//intFullCPU::choose_decode_thread(){    int rv = -1;    //  Use a Round-Robin approach to decide where to start    unsigned t = first_decode_thread;    first_decode_thread = ++first_decode_thread % number_of_threads;    unsigned first = t;    unsigned low_count = UINT_MAX;    switch (fetch_policy) {      case IC:	first = 0;	t = 0;	do {	    unsigned cnt = decodeQueue->count(t) + IQNumInstructions(t);	    if (ifq[t].num_available()) {		if (cnt < low_count) {		    low_count = cnt;		    rv = t;		}	    }	    t = (t+1) % number_of_threads;	} while (first != t);	break;      default:	do {	    if (ifq[t].num_available()) {		rv = t;		break;	    }	    else {		t = (t+1) % number_of_threads;	    }	} while (first != t);	break;    }    return rv;}voidFullCPU::start_decode(){    //  if we don't have a place to put new instructions, bail    if (!decodeQueue->loadable())	return;    int thread = choose_decode_thread();    if (thread < 0) {	// if we can't decode anything this cycle...        return;    }    FetchQueue *fq = &(ifq[thread]);    //  as long as there are instructions, and we have bandwidth    while (decodeQueue->addBW(thread)	   && (fq->num_valid + fq->num_squashed) > 0)    {	DynInst *inst = fq->pull();	if (inst) {	    decodeQueue->add(inst);	    if (inst->btb_miss() && inst->recover_inst)		fixup_btb_miss(inst);	} else {	    //  instruction was squashed earlier...	    //  drop it on the floor	}    }}voidFullCPU::fixup_btb_miss(DynInst *inst){    //  For absolute and PC-relative (i.e. direct, not indirect)    //  control instructions that were predicted taken, the BTB    //  may have predicted the target address incorrectly or not    //  at all.  Since these addresses by definition can be    //  calculated without executing the instruction, fix that up    //  here.    //    //  Note that indirect jumps (that jump to addresses stored in    //  registers) need to be executed to get the target, so we    //  can't fix those up yet.    //    //  The F_DIRJMP flag indicates a direct control transfer instruction.    //    int thread_number = inst->thread_number;    //  if we had a BTB miss that put us onto the wrong path    if (inst->isDirectCtrl() && inst->btb_miss() && inst->recover_inst) {	assert(inst->xc->spec_mode > 0);	fetch_squash(thread_number);	inst->recover_inst = false;	inst->xc->spec_mode--;	//  Correct the PC for the BTB miss	inst->xc->regs.pc = inst->branchTarget();	//	//  If we've trasferred completely out of spec-mode...	//	if (inst->xc->spec_mode == 0) {	    // reset use_spec_? reg maps and speculative memory state	    inst->xc->reset_spec_state();	}	// Make sure that we don't apply the fixup on THIS cycle:	// have to schedule event since fetch is simulated after dispatch	// within each cycle	fetch_stall[thread_number] |= BRANCH_STALL;	Event *ev =	    new ClearFetchStallEvent(this, thread_number, BRANCH_STALL);	ev->schedule(curTick + cycles(1));	fid_cause[thread_number] = FLOSS_FETCH_BRANCH_RECOVERY;    }}voidFullCPU::dispatch_init(){    if (IQ[0]->type() == BaseIQ::Segmented)	chainWires = new ChainWireInfo(max_chains, max_wires, numIQueues,				       chainWirePolicy);    else	chainWires = 0;}voidFullCPU::dispatchRegStats(){    using namespace Stats;    dispatch_count.resize(number_of_threads);    for (int i = 0; i < number_of_threads; ++i)	dispatch_count[i] = 0;    dispatch_count_stat	.init(number_of_threads)	.name(name() + ".DIS:count")	.desc("cumulative count of dispatched insts")	.flags(total)	;    dispatched_serializing	.init(number_of_threads)	.name(name() + ".DIS:serializing_insts")	.desc("count of serializing insts dispatched")	.flags(total)	;    dispatch_serialize_stall_cycles	.init(number_of_threads)	.name(name() + ".DIS:serialize_stall_cycles")	.desc("count of cycles dispatch stalled for serializing inst")	.flags(total)	;    //    //  Chaining stats    //    chain_heads	.init(number_of_threads)	.name(name() + ".DIS:chain_heads")	.desc("number insts that are chain heads")	.flags(total)	;    chains_insuf	.init(number_of_threads)	.name(name() + ".DIS:chains_insuf")	.desc("number of times thread had insuf chains")	.flags(total)	;    dispatched_ops	.init(number_of_threads)	.name(name() + ".DIS:op_count")	.desc("number of operations dispatched")	.flags(total)	;    rob_cap_events	.init(number_of_threads)	.name(name() + ".ROB:cap_events")	.desc("number of cycles where ROB cap was active")	.flags(total)	;    rob_cap_inst_count	.init(number_of_threads)	.name(name() + ".ROB:cap_inst")	.desc("number of instructions held up by ROB cap")	.flags(total)	;    iq_cap_events	.init(number_of_threads)	.name(name() +".IQ:cap_events" )	.desc("number of cycles where IQ cap was active")	.flags(total)	;    iq_cap_inst_count	.init(number_of_threads)	.name(name() + ".IQ:cap_inst")	.desc("number of instructions held up by IQ cap")	.flags(total)	;    mod_n_disp_stalls.init(number_of_threads);    mod_n_disp_stall_free.init(number_of_threads);    if (dispatch_policy == MODULO_N) {	mod_n_disp_stalls	    .name(name() + ".DIS:mod_n_stalls")	    .desc("cycles where dispatch stalled due to mod-n")	    .flags(total)	    ;	mod_n_disp_stall_free	    .name(name() + ".DIS:mod_n_stall_free")	    .desc("free slots when dispatch stalled due to mod-n")	    .flags(total)	    ;    }    reg_int_full	.name(name() + ".REG:int:full")	.desc("number of cycles where there were no INT registers")	;    reg_fp_full	.name(name() + ".REG:fp:full")	.desc("number of cycles where there were no FP registers")	;    insufficient_chains	.name(name() + ".DIS:insufficient_chains")	.desc("Number of instances where dispatch stopped")	;    secondChoiceCluster	.name(name() + ".DIS:second_choice_clust")	.desc("Number of instructions dispatched to second-choice cluster");    secondChoiceStall	.name(name() + ".DIS:second_choice_stall")	.desc("Number of instructions stalled when first choice not available");    //    //  Two input instruction stats    //    two_op_inst_count	.init(number_of_threads)	.name(name() + ".DIS:two_input_insts")	.desc("Number of two input instructions queued")	.flags(total)	;    one_rdy_inst_count	.init(number_of_threads)	.name(name() + ".DIS:one_rdy_insts")	.desc("number of 2-op insts w/ one rdy op")	.flags(total)	;    chain_create_dist	.init(NUM_CHAIN_CR_CLASSES)	.name(name() + ".DIS:chain_creation")	.desc("Reason that chain head was created")	.flags(pdf | dist)	;    for (int i=0; i < NUM_CHAIN_CR_CLASSES; ++i) {	chain_create_dist.subname(i, chain_cr_class_desc[i]);    }    inst_class_dist	.init(NUM_INSN_CLASSES)	.name(name() + "inst_class_dist")	.desc("Operand status at dispatch")	.flags(pdf | dist)	;    for (int i=0; i < NUM_INSN_CLASSES; ++i) {	inst_class_dist.subname(i, dispatchInstClassDesc[i]);    }}voidFullCPU::dispatchRegFormulas(){    using namespace Stats;    chain_head_frac	.name(name() + ".DIS:chain_head_frac")	.desc("fraction of insts that are chain heads")	.flags(total)	;    chain_head_frac = 100 * chain_heads / dispatch_count_stat;    chains_insuf_rate	.name(name() + ".DIS:chains_insuf_rate")	.desc("rate that thread had insuf chains")	.flags(total)	;    chains_insuf_rate = chains_insuf / numCycles;    dispatched_op_rate	.name(name() + ".DIS:op_rate")	.desc("dispatched operations per cycle")	.flags(total)	;    dispatched_op_rate = dispatched_ops / numCycles;    dispatch_rate	.name(name() + ".DIS:rate")	.desc("dispatched_insts per cycle")	.flags(total)	;    dispatch_rate = dispatch_count_stat / numCycles;    if (dispatch_policy == MODULO_N) {	mod_n_stall_avg_free	    .name(name() + ".DIS:mod_n_stall_avg_free")	    .desc("avg free slots per cycle")	    .flags(total)	    ;	mod_n_stall_avg_free = mod_n_disp_stall_free / mod_n_disp_stalls;	mod_n_stall_frac	    .name(name() + ".DIS:mod_n_stall_frac")	    .desc("avg stalls per cycle")	    .flags(total)	    ;	mod_n_stall_frac = mod_n_disp_stalls / numCycles;    }    reg_int_occ_rate	.name(name() + ".REG:int:occ_rate")	.desc("Average INT register usage")	.flags(total)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -